xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll (revision fd6f8b3ce33cc2cbe378f8f1b391f3f40fa7bd54)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,GCN-IEEE,SDAG-IEEE %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,GCN-IEEE,GISEL-IEEE %s
4
5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,SDAG,GCN-DAZ,SDAG-DAZ %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,GISEL,GCN-DAZ,GISEL-DAZ %s
7
8define float @v_sqrt_f32(float %x) {
9; SDAG-IEEE-LABEL: v_sqrt_f32:
10; SDAG-IEEE:       ; %bb.0:
11; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
13; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
14; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
15; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
16; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
17; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
18; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
19; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
20; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
21; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
22; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
23; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
24; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
25; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
26; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
27; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
28; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
29; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
30; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
31;
32; GISEL-IEEE-LABEL: v_sqrt_f32:
33; GISEL-IEEE:       ; %bb.0:
34; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
36; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
37; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
38; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
39; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
40; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
41; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
42; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
43; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
44; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
45; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
46; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
47; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
48; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
49; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
50; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
51; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
52; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
53; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
54;
55; SDAG-DAZ-LABEL: v_sqrt_f32:
56; SDAG-DAZ:       ; %bb.0:
57; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
59; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
60; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
61; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
62; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
63; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
64; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
65; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
66; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
67; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
68; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
69; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
70; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
71; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
72; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
73; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
74; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
75; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
76;
77; GISEL-DAZ-LABEL: v_sqrt_f32:
78; GISEL-DAZ:       ; %bb.0:
79; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
81; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
82; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
83; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
84; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
85; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
86; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
87; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
88; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
89; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
90; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
91; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
92; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
93; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
94; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
95; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
96; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
97; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
98  %result = call float @llvm.sqrt.f32(float %x)
99  ret float %result
100}
101
102define float @v_sqrt_f32_fneg(float %x) {
103; SDAG-IEEE-LABEL: v_sqrt_f32_fneg:
104; SDAG-IEEE:       ; %bb.0:
105; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x8f800000
107; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v0
108; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
109; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
110; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
111; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
112; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
113; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
114; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
115; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
116; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
117; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
118; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
119; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
120; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
121; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
122; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
123; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
124; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
125;
126; GISEL-IEEE-LABEL: v_sqrt_f32_fneg:
127; GISEL-IEEE:       ; %bb.0:
128; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
130; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
131; GISEL-IEEE-NEXT:    v_mul_f32_e64 v2, -v0, v2
132; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v1
133; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
134; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
135; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
136; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
137; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
138; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
139; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
140; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
141; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
142; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
143; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
144; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
145; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
146; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
147; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
148; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
149;
150; SDAG-DAZ-LABEL: v_sqrt_f32_fneg:
151; SDAG-DAZ:       ; %bb.0:
152; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0x8f800000
154; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v0
155; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
156; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
157; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
158; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
159; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
160; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
161; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
162; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
163; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
164; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
165; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
166; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
167; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
168; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
169; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
170; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
171;
172; GISEL-DAZ-LABEL: v_sqrt_f32_fneg:
173; GISEL-DAZ:       ; %bb.0:
174; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
176; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
177; GISEL-DAZ-NEXT:    v_mul_f32_e64 v2, -v0, v2
178; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v1
179; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
180; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
181; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
182; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
183; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
184; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
185; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
186; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
187; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
188; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
189; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
190; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
191; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
192; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
193; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
194  %x.neg = fneg float %x
195  %result = call float @llvm.sqrt.f32(float %x.neg)
196  ret float %result
197}
198
199define float @v_sqrt_f32_fabs(float %x) {
200; SDAG-IEEE-LABEL: v_sqrt_f32_fabs:
201; SDAG-IEEE:       ; %bb.0:
202; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
204; SDAG-IEEE-NEXT:    s_mov_b32 s5, 0x4f800000
205; SDAG-IEEE-NEXT:    v_mul_f32_e64 v1, |v0|, s5
206; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
207; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, |v0|, v1, vcc
208; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
209; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
210; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
211; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
212; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
213; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
214; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
215; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
216; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
217; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
218; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
219; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
220; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
221; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
222; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
223;
224; GISEL-IEEE-LABEL: v_sqrt_f32_fabs:
225; GISEL-IEEE:       ; %bb.0:
226; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
228; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
229; GISEL-IEEE-NEXT:    v_mul_f32_e64 v2, |v0|, v2
230; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
231; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, |v0|, v2, vcc
232; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
233; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
234; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
235; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
236; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
237; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
238; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
239; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
240; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
241; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
242; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
243; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
244; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
245; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
246; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
247;
248; SDAG-DAZ-LABEL: v_sqrt_f32_fabs:
249; SDAG-DAZ:       ; %bb.0:
250; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
252; SDAG-DAZ-NEXT:    s_mov_b32 s5, 0x4f800000
253; SDAG-DAZ-NEXT:    v_mul_f32_e64 v1, |v0|, s5
254; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
255; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v0, |v0|, v1, vcc
256; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
257; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
258; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
259; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
260; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
261; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
262; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
263; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
264; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
265; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
266; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
267; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
268; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
269; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
270;
271; GISEL-DAZ-LABEL: v_sqrt_f32_fabs:
272; GISEL-DAZ:       ; %bb.0:
273; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
275; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
276; GISEL-DAZ-NEXT:    v_mul_f32_e64 v2, |v0|, v2
277; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
278; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, |v0|, v2, vcc
279; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
280; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
281; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
282; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
283; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
284; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
285; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
286; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
287; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
288; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
289; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
290; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
291; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
292; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
293  %x.fabs = call float @llvm.fabs.f32(float %x)
294  %result = call float @llvm.sqrt.f32(float %x.fabs)
295  ret float %result
296}
297
298define float @v_sqrt_f32_fneg_fabs(float %x) {
299; SDAG-IEEE-LABEL: v_sqrt_f32_fneg_fabs:
300; SDAG-IEEE:       ; %bb.0:
301; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x8f800000
303; SDAG-IEEE-NEXT:    s_mov_b32 s5, 0xcf800000
304; SDAG-IEEE-NEXT:    v_mul_f32_e64 v1, |v0|, s5
305; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
306; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, -|v0|, v1, vcc
307; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
308; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
309; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
310; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
311; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
312; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
313; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
314; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
315; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
316; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
317; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
318; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
319; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
320; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
321; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
322;
323; GISEL-IEEE-LABEL: v_sqrt_f32_fneg_fabs:
324; GISEL-IEEE:       ; %bb.0:
325; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
327; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
328; GISEL-IEEE-NEXT:    v_mul_f32_e64 v2, -|v0|, v2
329; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, -|v0|, v1
330; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, -|v0|, v2, vcc
331; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
332; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
333; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
334; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
335; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
336; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
337; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
338; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
339; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
340; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
341; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
342; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
343; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
344; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
345; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
346;
347; SDAG-DAZ-LABEL: v_sqrt_f32_fneg_fabs:
348; SDAG-DAZ:       ; %bb.0:
349; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0x8f800000
351; SDAG-DAZ-NEXT:    s_mov_b32 s5, 0xcf800000
352; SDAG-DAZ-NEXT:    v_mul_f32_e64 v1, |v0|, s5
353; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
354; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v0, -|v0|, v1, vcc
355; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
356; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
357; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
358; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
359; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
360; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
361; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
362; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
363; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
364; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
365; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
366; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
367; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
368; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
369;
370; GISEL-DAZ-LABEL: v_sqrt_f32_fneg_fabs:
371; GISEL-DAZ:       ; %bb.0:
372; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
374; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
375; GISEL-DAZ-NEXT:    v_mul_f32_e64 v2, -|v0|, v2
376; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, -|v0|, v1
377; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, -|v0|, v2, vcc
378; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
379; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
380; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
381; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
382; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
383; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
384; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
385; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
386; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
387; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
388; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
389; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
390; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
391; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
392  %x.fabs = call float @llvm.fabs.f32(float %x)
393  %x.fabs.neg = fneg float %x.fabs
394  %result = call float @llvm.sqrt.f32(float %x.fabs.neg)
395  ret float %result
396}
397
398define float @v_sqrt_f32_ninf(float %x) {
399; SDAG-IEEE-LABEL: v_sqrt_f32_ninf:
400; SDAG-IEEE:       ; %bb.0:
401; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
403; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
404; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
405; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
406; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
407; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
408; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
409; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
410; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
411; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
412; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
413; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
414; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
415; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
416; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
417; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
418; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
419; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
420; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
421;
422; GISEL-IEEE-LABEL: v_sqrt_f32_ninf:
423; GISEL-IEEE:       ; %bb.0:
424; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
426; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
427; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
428; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
429; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
430; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
431; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
432; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
433; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
434; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
435; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
436; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
437; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
438; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
439; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
440; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
441; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
442; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
443; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
444;
445; SDAG-DAZ-LABEL: v_sqrt_f32_ninf:
446; SDAG-DAZ:       ; %bb.0:
447; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
449; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
450; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
451; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
452; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
453; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
454; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
455; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
456; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
457; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
458; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
459; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
460; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
461; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
462; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
463; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
464; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
465; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
466;
467; GISEL-DAZ-LABEL: v_sqrt_f32_ninf:
468; GISEL-DAZ:       ; %bb.0:
469; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
471; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
472; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
473; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
474; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
475; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
476; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
477; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
478; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
479; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
480; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
481; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
482; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
483; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
484; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
485; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
486; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
487; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
488  %result = call ninf float @llvm.sqrt.f32(float %x)
489  ret float %result
490}
491
492define float @v_sqrt_f32_no_infs_attribute(float %x) #5 {
493; SDAG-IEEE-LABEL: v_sqrt_f32_no_infs_attribute:
494; SDAG-IEEE:       ; %bb.0:
495; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
497; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
498; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
499; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
500; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
501; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
502; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
503; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
504; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
505; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
506; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
507; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
508; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
509; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
510; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
511; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
512; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
513; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
514; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
515;
516; GISEL-IEEE-LABEL: v_sqrt_f32_no_infs_attribute:
517; GISEL-IEEE:       ; %bb.0:
518; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
520; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
521; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
522; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
523; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
524; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
525; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
526; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
527; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
528; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
529; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
530; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
531; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
532; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
533; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
534; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
535; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
536; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
537; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
538;
539; SDAG-DAZ-LABEL: v_sqrt_f32_no_infs_attribute:
540; SDAG-DAZ:       ; %bb.0:
541; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
543; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
544; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
545; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
546; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
547; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
548; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
549; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
550; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
551; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
552; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
553; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
554; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
555; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
556; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
557; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
558; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
559; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
560;
561; GISEL-DAZ-LABEL: v_sqrt_f32_no_infs_attribute:
562; GISEL-DAZ:       ; %bb.0:
563; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
565; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
566; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
567; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
568; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
569; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
570; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
571; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
572; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
573; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
574; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
575; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
576; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
577; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
578; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
579; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
580; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
581; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
582  %result = call ninf float @llvm.sqrt.f32(float %x)
583  ret float %result
584}
585
586define float @v_sqrt_f32_nnan(float %x) {
587; SDAG-IEEE-LABEL: v_sqrt_f32_nnan:
588; SDAG-IEEE:       ; %bb.0:
589; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
591; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
592; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
593; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
594; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
595; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
596; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
597; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
598; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
599; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
600; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
601; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
602; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
603; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
604; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
605; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
606; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
607; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
608; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
609;
610; GISEL-IEEE-LABEL: v_sqrt_f32_nnan:
611; GISEL-IEEE:       ; %bb.0:
612; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
614; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
615; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
616; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
617; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
618; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
619; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
620; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
621; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
622; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
623; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
624; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
625; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
626; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
627; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
628; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
629; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
630; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
631; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
632;
633; SDAG-DAZ-LABEL: v_sqrt_f32_nnan:
634; SDAG-DAZ:       ; %bb.0:
635; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
637; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
638; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
639; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
640; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
641; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
642; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
643; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
644; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
645; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
646; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
647; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
648; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
649; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
650; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
651; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
652; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
653; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
654;
655; GISEL-DAZ-LABEL: v_sqrt_f32_nnan:
656; GISEL-DAZ:       ; %bb.0:
657; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
659; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
660; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
661; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
662; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
663; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
664; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
665; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
666; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
667; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
668; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
669; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
670; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
671; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
672; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
673; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
674; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
675; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
676  %result = call nnan float @llvm.sqrt.f32(float %x)
677  ret float %result
678}
679
680define amdgpu_ps i32 @s_sqrt_f32(float inreg %x) {
681; SDAG-IEEE-LABEL: s_sqrt_f32:
682; SDAG-IEEE:       ; %bb.0:
683; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
684; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
685; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s0, v1
686; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, s0
687; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
688; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
689; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
690; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
691; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
692; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
693; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
694; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
695; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
696; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
697; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
698; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
699; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
700; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
701; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
702; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
703; SDAG-IEEE-NEXT:    v_readfirstlane_b32 s0, v0
704; SDAG-IEEE-NEXT:    ; return to shader part epilog
705;
706; GISEL-IEEE-LABEL: s_sqrt_f32:
707; GISEL-IEEE:       ; %bb.0:
708; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
709; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
710; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, s0
711; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, s0, v2
712; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
713; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
714; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
715; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
716; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
717; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], 1, v1
718; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
719; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v3
720; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
721; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v5
722; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[0:1]
723; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
724; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
725; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
726; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
727; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
728; GISEL-IEEE-NEXT:    v_readfirstlane_b32 s0, v0
729; GISEL-IEEE-NEXT:    ; return to shader part epilog
730;
731; SDAG-DAZ-LABEL: s_sqrt_f32:
732; SDAG-DAZ:       ; %bb.0:
733; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
734; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
735; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s0, v1
736; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, s0
737; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
738; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
739; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
740; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
741; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
742; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
743; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
744; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
745; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
746; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
747; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
748; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
749; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
750; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
751; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
752; SDAG-DAZ-NEXT:    v_readfirstlane_b32 s0, v0
753; SDAG-DAZ-NEXT:    ; return to shader part epilog
754;
755; GISEL-DAZ-LABEL: s_sqrt_f32:
756; GISEL-DAZ:       ; %bb.0:
757; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
758; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
759; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, s0
760; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, s0, v2
761; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
762; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
763; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
764; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
765; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
766; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
767; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
768; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
769; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
770; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
771; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
772; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
773; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
774; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
775; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
776; GISEL-DAZ-NEXT:    v_readfirstlane_b32 s0, v0
777; GISEL-DAZ-NEXT:    ; return to shader part epilog
778  %result = call float @llvm.sqrt.f32(float %x)
779  %cast = bitcast float %result to i32
780  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
781  ret i32 %firstlane
782}
783
784define amdgpu_ps i32 @s_sqrt_f32_ninf(float inreg %x) {
785; SDAG-IEEE-LABEL: s_sqrt_f32_ninf:
786; SDAG-IEEE:       ; %bb.0:
787; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
788; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
789; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s0, v1
790; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, s0
791; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
792; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
793; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
794; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
795; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
796; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
797; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
798; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
799; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
800; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
801; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
802; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
803; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
804; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
805; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
806; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
807; SDAG-IEEE-NEXT:    v_readfirstlane_b32 s0, v0
808; SDAG-IEEE-NEXT:    ; return to shader part epilog
809;
810; GISEL-IEEE-LABEL: s_sqrt_f32_ninf:
811; GISEL-IEEE:       ; %bb.0:
812; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
813; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
814; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, s0
815; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, s0, v2
816; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
817; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
818; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
819; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
820; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
821; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], 1, v1
822; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
823; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v3
824; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
825; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v5
826; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[0:1]
827; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
828; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
829; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
830; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
831; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
832; GISEL-IEEE-NEXT:    v_readfirstlane_b32 s0, v0
833; GISEL-IEEE-NEXT:    ; return to shader part epilog
834;
835; SDAG-DAZ-LABEL: s_sqrt_f32_ninf:
836; SDAG-DAZ:       ; %bb.0:
837; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
838; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
839; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s0, v1
840; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, s0
841; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
842; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
843; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
844; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
845; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
846; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
847; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
848; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
849; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
850; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
851; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
852; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
853; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
854; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
855; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
856; SDAG-DAZ-NEXT:    v_readfirstlane_b32 s0, v0
857; SDAG-DAZ-NEXT:    ; return to shader part epilog
858;
859; GISEL-DAZ-LABEL: s_sqrt_f32_ninf:
860; GISEL-DAZ:       ; %bb.0:
861; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
862; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
863; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, s0
864; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, s0, v2
865; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
866; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
867; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
868; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
869; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
870; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
871; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
872; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
873; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
874; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
875; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
876; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
877; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
878; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
879; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
880; GISEL-DAZ-NEXT:    v_readfirstlane_b32 s0, v0
881; GISEL-DAZ-NEXT:    ; return to shader part epilog
882  %result = call ninf float @llvm.sqrt.f32(float %x)
883  %cast = bitcast float %result to i32
884  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
885  ret i32 %firstlane
886}
887
888define amdgpu_ps i32 @s_sqrt_f32_afn(float inreg %x) {
889; GCN-LABEL: s_sqrt_f32_afn:
890; GCN:       ; %bb.0:
891; GCN-NEXT:    v_sqrt_f32_e32 v0, s0
892; GCN-NEXT:    v_readfirstlane_b32 s0, v0
893; GCN-NEXT:    ; return to shader part epilog
894  %result = call afn float @llvm.sqrt.f32(float %x)
895  %cast = bitcast float %result to i32
896  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
897  ret i32 %firstlane
898}
899
900define amdgpu_ps i32 @s_sqrt_f32_afn_nnan_ninf(float inreg %x) {
901; GCN-LABEL: s_sqrt_f32_afn_nnan_ninf:
902; GCN:       ; %bb.0:
903; GCN-NEXT:    v_sqrt_f32_e32 v0, s0
904; GCN-NEXT:    v_readfirstlane_b32 s0, v0
905; GCN-NEXT:    ; return to shader part epilog
906  %result = call afn nnan ninf float @llvm.sqrt.f32(float %x)
907  %cast = bitcast float %result to i32
908  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
909  ret i32 %firstlane
910}
911
912define float @v_sqrt_f32_nsz(float %x) {
913; SDAG-IEEE-LABEL: v_sqrt_f32_nsz:
914; SDAG-IEEE:       ; %bb.0:
915; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
917; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
918; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
919; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
920; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
921; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
922; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
923; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
924; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
925; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
926; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
927; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
928; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
929; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
930; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
931; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
932; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
933; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
934; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
935;
936; GISEL-IEEE-LABEL: v_sqrt_f32_nsz:
937; GISEL-IEEE:       ; %bb.0:
938; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
939; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
940; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
941; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
942; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
943; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
944; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
945; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
946; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
947; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
948; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
949; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
950; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
951; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
952; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
953; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
954; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
955; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
956; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
957; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
958;
959; SDAG-DAZ-LABEL: v_sqrt_f32_nsz:
960; SDAG-DAZ:       ; %bb.0:
961; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
963; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
964; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
965; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
966; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
967; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
968; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
969; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
970; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
971; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
972; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
973; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
974; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
975; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
976; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
977; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
978; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
979; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
980;
981; GISEL-DAZ-LABEL: v_sqrt_f32_nsz:
982; GISEL-DAZ:       ; %bb.0:
983; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
985; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
986; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
987; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
988; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
989; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
990; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
991; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
992; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
993; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
994; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
995; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
996; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
997; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
998; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
999; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1000; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1001; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
1002  %result = call nsz float @llvm.sqrt.f32(float %x)
1003  ret float %result
1004}
1005
1006define float @v_sqrt_f32_nnan_ninf(float %x) {
1007; SDAG-IEEE-LABEL: v_sqrt_f32_nnan_ninf:
1008; SDAG-IEEE:       ; %bb.0:
1009; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
1011; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1012; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1013; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1014; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1015; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1016; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1017; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1018; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1019; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
1020; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
1021; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
1022; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1023; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1024; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1025; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1026; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1027; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1028; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1029;
1030; GISEL-IEEE-LABEL: v_sqrt_f32_nnan_ninf:
1031; GISEL-IEEE:       ; %bb.0:
1032; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
1034; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1035; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1036; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1037; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1038; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1039; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1040; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
1041; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
1042; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1043; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1044; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
1045; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1046; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1047; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1048; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1049; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1050; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1051; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1052;
1053; SDAG-DAZ-LABEL: v_sqrt_f32_nnan_ninf:
1054; SDAG-DAZ:       ; %bb.0:
1055; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1056; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
1057; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1058; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1059; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1060; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
1061; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
1062; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1063; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1064; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
1065; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
1066; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
1067; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
1068; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1069; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1070; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
1071; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1072; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1073; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
1074;
1075; GISEL-DAZ-LABEL: v_sqrt_f32_nnan_ninf:
1076; GISEL-DAZ:       ; %bb.0:
1077; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1078; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
1079; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1080; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1081; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1082; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
1083; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
1084; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1085; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1086; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
1087; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
1088; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
1089; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
1090; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1091; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1092; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
1093; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1094; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1095; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
1096  %result = call nnan ninf float @llvm.sqrt.f32(float %x)
1097  ret float %result
1098}
1099
1100define float @v_sqrt_f32_nnan_ninf_nsz(float %x) {
1101; SDAG-IEEE-LABEL: v_sqrt_f32_nnan_ninf_nsz:
1102; SDAG-IEEE:       ; %bb.0:
1103; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
1105; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1106; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1107; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1108; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1109; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1110; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1111; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1112; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1113; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
1114; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
1115; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
1116; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1117; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1118; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1119; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1120; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1121; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1122; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1123;
1124; GISEL-IEEE-LABEL: v_sqrt_f32_nnan_ninf_nsz:
1125; GISEL-IEEE:       ; %bb.0:
1126; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1127; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
1128; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1129; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1130; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1131; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1132; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1133; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1134; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
1135; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
1136; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1137; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1138; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
1139; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1140; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1141; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1142; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1143; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1144; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1145; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1146;
1147; SDAG-DAZ-LABEL: v_sqrt_f32_nnan_ninf_nsz:
1148; SDAG-DAZ:       ; %bb.0:
1149; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
1151; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1152; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1153; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1154; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
1155; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
1156; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1157; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1158; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
1159; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
1160; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
1161; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
1162; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1163; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1164; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
1165; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1166; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1167; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
1168;
1169; GISEL-DAZ-LABEL: v_sqrt_f32_nnan_ninf_nsz:
1170; GISEL-DAZ:       ; %bb.0:
1171; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
1173; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1174; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1175; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1176; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
1177; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
1178; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1179; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1180; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
1181; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
1182; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
1183; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
1184; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1185; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1186; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
1187; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1188; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1189; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
1190  %result = call nnan ninf nsz float @llvm.sqrt.f32(float %x)
1191  ret float %result
1192}
1193
1194define float @v_sqrt_f32_afn(float %x) {
1195; GCN-LABEL: v_sqrt_f32_afn:
1196; GCN:       ; %bb.0:
1197; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1199; GCN-NEXT:    s_setpc_b64 s[30:31]
1200  %result = call afn float @llvm.sqrt.f32(float %x)
1201  ret float %result
1202}
1203
1204define float @v_sqrt_f32_afn_nsz(float %x) {
1205; GCN-LABEL: v_sqrt_f32_afn_nsz:
1206; GCN:       ; %bb.0:
1207; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1208; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1209; GCN-NEXT:    s_setpc_b64 s[30:31]
1210  %result = call afn nsz float @llvm.sqrt.f32(float %x)
1211  ret float %result
1212}
1213
1214define <2 x float> @v_sqrt_v2f32_afn(<2 x float> %x) {
1215; GCN-LABEL: v_sqrt_v2f32_afn:
1216; GCN:       ; %bb.0:
1217; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1219; GCN-NEXT:    v_sqrt_f32_e32 v1, v1
1220; GCN-NEXT:    s_setpc_b64 s[30:31]
1221  %result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
1222  ret <2 x float> %result
1223}
1224
1225define float @v_sqrt_f32_afn_nnan(float %x) {
1226; GCN-LABEL: v_sqrt_f32_afn_nnan:
1227; GCN:       ; %bb.0:
1228; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1229; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1230; GCN-NEXT:    s_setpc_b64 s[30:31]
1231  %result = call afn nnan float @llvm.sqrt.f32(float %x)
1232  ret float %result
1233}
1234
1235define float @v_sqrt_f32_fabs_afn_ninf(float %x) {
1236; GCN-LABEL: v_sqrt_f32_fabs_afn_ninf:
1237; GCN:       ; %bb.0:
1238; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239; GCN-NEXT:    v_sqrt_f32_e64 v0, |v0|
1240; GCN-NEXT:    s_setpc_b64 s[30:31]
1241  %fabs = call float @llvm.fabs.f32(float %x)
1242  %result = call afn ninf float @llvm.sqrt.f32(float %fabs)
1243  ret float %result
1244}
1245
1246define float @v_sqrt_f32_afn_nnan_ninf(float %x) {
1247; GCN-LABEL: v_sqrt_f32_afn_nnan_ninf:
1248; GCN:       ; %bb.0:
1249; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1250; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1251; GCN-NEXT:    s_setpc_b64 s[30:31]
1252  %result = call afn nnan ninf float @llvm.sqrt.f32(float %x)
1253  ret float %result
1254}
1255
1256define <2 x float> @v_sqrt_v2f32_afn_nnan_ninf(<2 x float> %x) {
1257; GCN-LABEL: v_sqrt_v2f32_afn_nnan_ninf:
1258; GCN:       ; %bb.0:
1259; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1261; GCN-NEXT:    v_sqrt_f32_e32 v1, v1
1262; GCN-NEXT:    s_setpc_b64 s[30:31]
1263  %result = call afn nnan ninf <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
1264  ret <2 x float> %result
1265}
1266
1267define float @v_sqrt_f32_afn_nnan_ninf_nsz(float %x) {
1268; GCN-LABEL: v_sqrt_f32_afn_nnan_ninf_nsz:
1269; GCN:       ; %bb.0:
1270; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1271; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1272; GCN-NEXT:    s_setpc_b64 s[30:31]
1273  %result = call afn nnan ninf nsz float @llvm.sqrt.f32(float %x)
1274  ret float %result
1275}
1276
1277define float @v_sqrt_f32__approx_func_fp_math(float %x) #2 {
1278; GCN-LABEL: v_sqrt_f32__approx_func_fp_math:
1279; GCN:       ; %bb.0:
1280; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1281; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1282; GCN-NEXT:    s_setpc_b64 s[30:31]
1283  %result = call nsz float @llvm.sqrt.f32(float %x)
1284  ret float %result
1285}
1286
1287define float @v_sqrt_f32__enough_unsafe_attrs(float %x) #3 {
1288; GCN-LABEL: v_sqrt_f32__enough_unsafe_attrs:
1289; GCN:       ; %bb.0:
1290; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1292; GCN-NEXT:    s_setpc_b64 s[30:31]
1293  %result = call nsz float @llvm.sqrt.f32(float %x)
1294  ret float %result
1295}
1296
1297define float @v_sqrt_f32__unsafe_attr(float %x) #4 {
1298; GCN-LABEL: v_sqrt_f32__unsafe_attr:
1299; GCN:       ; %bb.0:
1300; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
1302; GCN-NEXT:    s_setpc_b64 s[30:31]
1303  %result = call nsz float @llvm.sqrt.f32(float %x)
1304  ret float %result
1305}
1306
1307define <2 x float> @v_sqrt_v2f32(<2 x float> %x) {
1308; SDAG-IEEE-LABEL: v_sqrt_v2f32:
1309; SDAG-IEEE:       ; %bb.0:
1310; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1311; SDAG-IEEE-NEXT:    s_mov_b32 s6, 0xf800000
1312; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1313; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1314; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1315; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v2, v0
1316; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1317; SDAG-IEEE-NEXT:    v_fma_f32 v4, -v3, v2, v0
1318; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1319; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1320; SDAG-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1321; SDAG-IEEE-NEXT:    v_fma_f32 v2, -v4, v2, v0
1322; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1323; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1324; SDAG-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1325; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1326; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
1327; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1328; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1329; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v4, v1
1330; SDAG-IEEE-NEXT:    v_mov_b32_e32 v3, 0x260
1331; SDAG-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v3
1332; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1333; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
1334; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, v1
1335; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1336; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
1337; SDAG-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
1338; SDAG-IEEE-NEXT:    v_fma_f32 v4, -v5, v4, v1
1339; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1340; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
1341; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
1342; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1343; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v3
1344; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1345; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1346;
1347; GISEL-IEEE-LABEL: v_sqrt_v2f32:
1348; GISEL-IEEE:       ; %bb.0:
1349; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1350; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0xf800000
1351; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
1352; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
1353; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1354; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v0
1355; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v3
1356; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v3, v0
1357; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v3
1358; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v3, v0
1359; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1360; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1361; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v7
1362; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
1363; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1364; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1365; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
1366; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v2
1367; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1368; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
1369; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
1370; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v4
1371; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[4:5]
1372; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1373; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v3, v2, v1
1374; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v2
1375; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v2, v1
1376; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1377; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
1378; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v7
1379; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
1380; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1381; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1382; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v4
1383; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1384; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1385;
1386; SDAG-DAZ-LABEL: v_sqrt_v2f32:
1387; SDAG-DAZ:       ; %bb.0:
1388; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
1390; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1391; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1392; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1393; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v2, v0
1394; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, v0, v2
1395; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0.5, v2
1396; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v3, 0.5
1397; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v4, v3
1398; SDAG-DAZ-NEXT:    v_fma_f32 v5, -v3, v3, v0
1399; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v4, v2
1400; SDAG-DAZ-NEXT:    v_fma_f32 v2, v5, v2, v3
1401; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1402; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1403; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v1
1404; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
1405; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1406; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v3, v1
1407; SDAG-DAZ-NEXT:    v_mov_b32_e32 v4, 0x260
1408; SDAG-DAZ-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v4
1409; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1410; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v1, v3
1411; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
1412; SDAG-DAZ-NEXT:    v_fma_f32 v5, -v3, v2, 0.5
1413; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v5, v2
1414; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v2, v2, v1
1415; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
1416; SDAG-DAZ-NEXT:    v_fma_f32 v2, v6, v3, v2
1417; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1418; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1419; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v1, v4
1420; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1421; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
1422;
1423; GISEL-DAZ-LABEL: v_sqrt_v2f32:
1424; GISEL-DAZ:       ; %bb.0:
1425; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0xf800000
1427; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
1428; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
1429; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1430; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v3, v0
1431; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, v0, v3
1432; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
1433; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v3, v4, 0.5
1434; GISEL-DAZ-NEXT:    v_fma_f32 v4, v4, v5, v4
1435; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
1436; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v4, v4, v0
1437; GISEL-DAZ-NEXT:    v_fma_f32 v3, v5, v3, v4
1438; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1439; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1440; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
1441; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v2
1442; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1443; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v2, v1
1444; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x260
1445; GISEL-DAZ-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v4
1446; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[4:5]
1447; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, v1, v2
1448; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0.5, v2
1449; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v2, v3, 0.5
1450; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
1451; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v5, v2
1452; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v3, v3, v1
1453; GISEL-DAZ-NEXT:    v_fma_f32 v2, v5, v2, v3
1454; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1455; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1456; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v1, v4
1457; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1458; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
1459  %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
1460  ret <2 x float> %result
1461}
1462
1463define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
1464; SDAG-IEEE-LABEL: v_sqrt_v3f32:
1465; SDAG-IEEE:       ; %bb.0:
1466; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467; SDAG-IEEE-NEXT:    s_mov_b32 s6, 0xf800000
1468; SDAG-IEEE-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
1469; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1470; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1471; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v3, v0
1472; SDAG-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v3
1473; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v4, v3, v0
1474; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1475; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[4:5]
1476; SDAG-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v3
1477; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v5, v3, v0
1478; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v3
1479; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v4, v5, s[4:5]
1480; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1481; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1482; SDAG-IEEE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
1483; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1484; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1485; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v5, v1
1486; SDAG-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
1487; SDAG-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v4
1488; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[4:5]
1489; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v5
1490; SDAG-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v1
1491; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1492; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
1493; SDAG-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v5
1494; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v6, v5, v1
1495; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
1496; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
1497; SDAG-IEEE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v3
1498; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1499; SDAG-IEEE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v2
1500; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v2
1501; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
1502; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v5, v2
1503; SDAG-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v4
1504; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
1505; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v5
1506; SDAG-IEEE-NEXT:    v_fma_f32 v6, -v3, v5, v2
1507; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1508; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
1509; SDAG-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v5
1510; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v6, v5, v2
1511; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
1512; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
1513; SDAG-IEEE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v3
1514; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1515; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v2, v4
1516; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
1517; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1518;
1519; GISEL-IEEE-LABEL: v_sqrt_v3f32:
1520; GISEL-IEEE:       ; %bb.0:
1521; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0xf800000
1523; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v0
1524; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
1525; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1526; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v4, v0
1527; GISEL-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
1528; GISEL-IEEE-NEXT:    v_fma_f32 v6, -v5, v4, v0
1529; GISEL-IEEE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v4
1530; GISEL-IEEE-NEXT:    v_fma_f32 v8, -v7, v4, v0
1531; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1532; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
1533; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v8
1534; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
1535; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1536; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1537; GISEL-IEEE-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v1
1538; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
1539; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
1540; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v6, v1
1541; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, 0x260
1542; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v5
1543; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
1544; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v6
1545; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v4, v6, v1
1546; GISEL-IEEE-NEXT:    v_add_i32_e64 v8, s[4:5], 1, v6
1547; GISEL-IEEE-NEXT:    v_fma_f32 v9, -v8, v6, v1
1548; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v7
1549; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
1550; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v9
1551; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
1552; GISEL-IEEE-NEXT:    v_mul_f32_e32 v6, 0x37800000, v4
1553; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1554; GISEL-IEEE-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v2
1555; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v2, v3
1556; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
1557; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v2
1558; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v5
1559; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1560; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v3
1561; GISEL-IEEE-NEXT:    v_fma_f32 v6, -v4, v3, v2
1562; GISEL-IEEE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v3
1563; GISEL-IEEE-NEXT:    v_fma_f32 v8, -v7, v3, v2
1564; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1565; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1566; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v8
1567; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s[4:5]
1568; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1569; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1570; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v2, v5
1571; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
1572; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1573;
1574; SDAG-DAZ-LABEL: v_sqrt_v3f32:
1575; SDAG-DAZ:       ; %bb.0:
1576; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577; SDAG-DAZ-NEXT:    s_mov_b32 s6, 0xf800000
1578; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
1579; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1580; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1581; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v3, v0
1582; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, v0, v3
1583; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
1584; SDAG-DAZ-NEXT:    v_fma_f32 v5, -v3, v4, 0.5
1585; SDAG-DAZ-NEXT:    v_fma_f32 v4, v4, v5, v4
1586; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v4, v4, v0
1587; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
1588; SDAG-DAZ-NEXT:    v_fma_f32 v3, v6, v3, v4
1589; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1590; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1591; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
1592; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1593; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1594; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v4, v1
1595; SDAG-DAZ-NEXT:    v_mov_b32_e32 v5, 0x260
1596; SDAG-DAZ-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v5
1597; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[4:5]
1598; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, v1, v4
1599; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0.5, v4
1600; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v4, v3, 0.5
1601; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v6, v3
1602; SDAG-DAZ-NEXT:    v_fma_f32 v4, v4, v6, v4
1603; SDAG-DAZ-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v2
1604; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e64 s[4:5], s6, v2
1605; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
1606; SDAG-DAZ-NEXT:    v_fma_f32 v7, -v3, v3, v1
1607; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v6, v2
1608; SDAG-DAZ-NEXT:    v_fma_f32 v3, v7, v4, v3
1609; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1610; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1611; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v1, v5
1612; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1613; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, v2, v6
1614; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0.5, v6
1615; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v4, v3, 0.5
1616; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v6, v3
1617; SDAG-DAZ-NEXT:    v_fma_f32 v7, -v3, v3, v2
1618; SDAG-DAZ-NEXT:    v_fma_f32 v4, v4, v6, v4
1619; SDAG-DAZ-NEXT:    v_fma_f32 v3, v7, v4, v3
1620; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1621; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1622; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v2, v5
1623; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
1624; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
1625;
1626; GISEL-DAZ-LABEL: v_sqrt_v3f32:
1627; GISEL-DAZ:       ; %bb.0:
1628; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1629; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0xf800000
1630; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v0
1631; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
1632; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1633; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v4, v0
1634; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, v0, v4
1635; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0.5, v4
1636; GISEL-DAZ-NEXT:    v_fma_f32 v6, -v4, v5, 0.5
1637; GISEL-DAZ-NEXT:    v_fma_f32 v5, v5, v6, v5
1638; GISEL-DAZ-NEXT:    v_fma_f32 v4, v4, v6, v4
1639; GISEL-DAZ-NEXT:    v_fma_f32 v6, -v5, v5, v0
1640; GISEL-DAZ-NEXT:    v_fma_f32 v4, v6, v4, v5
1641; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1642; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1643; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
1644; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
1645; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1646; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v5, v1
1647; GISEL-DAZ-NEXT:    v_mov_b32_e32 v6, 0x260
1648; GISEL-DAZ-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v6
1649; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
1650; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, v1, v5
1651; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0.5, v5
1652; GISEL-DAZ-NEXT:    v_fma_f32 v7, -v5, v4, 0.5
1653; GISEL-DAZ-NEXT:    v_fma_f32 v4, v4, v7, v4
1654; GISEL-DAZ-NEXT:    v_fma_f32 v5, v5, v7, v5
1655; GISEL-DAZ-NEXT:    v_fma_f32 v7, -v4, v4, v1
1656; GISEL-DAZ-NEXT:    v_fma_f32 v4, v7, v5, v4
1657; GISEL-DAZ-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v2
1658; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 s[4:5], v2, v3
1659; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v2, v2, v7, s[4:5]
1660; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v3, v2
1661; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1662; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1663; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v1, v6
1664; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
1665; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, v2, v3
1666; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
1667; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v3, v4, 0.5
1668; GISEL-DAZ-NEXT:    v_fma_f32 v4, v4, v5, v4
1669; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
1670; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v4, v4, v2
1671; GISEL-DAZ-NEXT:    v_fma_f32 v3, v5, v3, v4
1672; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
1673; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1674; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v2, v6
1675; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
1676; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
1677  %result = call <3 x float> @llvm.sqrt.v3f32(<3 x float> %x)
1678  ret <3 x float> %result
1679}
1680
1681; fpmath should be ignored
1682define float @v_sqrt_f32_ulp05(float %x) {
1683; SDAG-IEEE-LABEL: v_sqrt_f32_ulp05:
1684; SDAG-IEEE:       ; %bb.0:
1685; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1686; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
1687; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1688; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1689; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1690; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1691; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1692; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1693; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1694; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1695; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
1696; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
1697; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
1698; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1699; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1700; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1701; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1702; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1703; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1704; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1705;
1706; GISEL-IEEE-LABEL: v_sqrt_f32_ulp05:
1707; GISEL-IEEE:       ; %bb.0:
1708; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
1710; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1711; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1712; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1713; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1714; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1715; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1716; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
1717; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
1718; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1719; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1720; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
1721; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1722; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1723; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1724; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1725; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1726; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1727; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1728;
1729; SDAG-DAZ-LABEL: v_sqrt_f32_ulp05:
1730; SDAG-DAZ:       ; %bb.0:
1731; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1732; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
1733; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1734; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1735; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1736; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
1737; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
1738; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1739; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1740; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
1741; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
1742; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
1743; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
1744; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1745; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1746; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
1747; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1748; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1749; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
1750;
1751; GISEL-DAZ-LABEL: v_sqrt_f32_ulp05:
1752; GISEL-DAZ:       ; %bb.0:
1753; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1754; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
1755; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1756; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1757; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1758; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
1759; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
1760; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1761; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1762; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
1763; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
1764; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
1765; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
1766; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1767; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1768; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
1769; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1770; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1771; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
1772  %result = call float @llvm.sqrt.f32(float %x), !fpmath !0
1773  ret float %result
1774}
1775
1776; fpmath should be used with DAZ only
1777define float @v_sqrt_f32_ulp1(float %x) {
1778; SDAG-IEEE-LABEL: v_sqrt_f32_ulp1:
1779; SDAG-IEEE:       ; %bb.0:
1780; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
1782; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1783; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1784; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1785; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1786; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1787; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1788; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1789; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1790; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
1791; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
1792; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
1793; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1794; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1795; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1796; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1797; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1798; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1799; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GISEL-IEEE-LABEL: v_sqrt_f32_ulp1:
1802; GISEL-IEEE:       ; %bb.0:
1803; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
1805; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1806; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1807; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1808; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
1809; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1810; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1811; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
1812; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
1813; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1814; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1815; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
1816; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1817; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1818; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1819; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
1820; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1821; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1822; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1823;
1824; GCN-DAZ-LABEL: v_sqrt_f32_ulp1:
1825; GCN-DAZ:       ; %bb.0:
1826; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1827; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
1828; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
1829  %result = call float @llvm.sqrt.f32(float %x), !fpmath !1
1830  ret float %result
1831}
1832
1833; fpmath should always be used
1834define float @v_sqrt_f32_ulp2(float %x) {
1835; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2:
1836; SDAG-IEEE:       ; %bb.0:
1837; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
1839; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1840; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1841; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1842; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1843; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1844; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
1845; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1846; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1847;
1848; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2:
1849; GISEL-IEEE:       ; %bb.0:
1850; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1851; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
1852; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1853; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1854; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1855; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1856; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1857; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
1858; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1859; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1860;
1861; GCN-DAZ-LABEL: v_sqrt_f32_ulp2:
1862; GCN-DAZ:       ; %bb.0:
1863; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
1865; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
1866  %result = call float @llvm.sqrt.f32(float %x), !fpmath !2
1867  ret float %result
1868}
1869
1870; fpmath should always be used
1871define float @v_sqrt_f32_ulp25(float %x) {
1872; SDAG-IEEE-LABEL: v_sqrt_f32_ulp25:
1873; SDAG-IEEE:       ; %bb.0:
1874; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1875; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
1876; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1877; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1878; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1879; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1880; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1881; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
1882; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1883; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1884;
1885; GISEL-IEEE-LABEL: v_sqrt_f32_ulp25:
1886; GISEL-IEEE:       ; %bb.0:
1887; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1888; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
1889; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1890; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1891; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1892; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1893; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1894; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
1895; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1896; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1897;
1898; GCN-DAZ-LABEL: v_sqrt_f32_ulp25:
1899; GCN-DAZ:       ; %bb.0:
1900; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1901; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
1902; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
1903  %result = call float @llvm.sqrt.f32(float %x), !fpmath !3
1904  ret float %result
1905}
1906
1907; fpmath should always be used
1908define float @v_sqrt_f32_ulp3(float %x) {
1909; SDAG-IEEE-LABEL: v_sqrt_f32_ulp3:
1910; SDAG-IEEE:       ; %bb.0:
1911; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1912; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
1913; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1914; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1915; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1916; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1917; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1918; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
1919; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1920; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1921;
1922; GISEL-IEEE-LABEL: v_sqrt_f32_ulp3:
1923; GISEL-IEEE:       ; %bb.0:
1924; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1925; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
1926; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
1927; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1928; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1929; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1930; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1931; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
1932; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1933; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1934;
1935; GCN-DAZ-LABEL: v_sqrt_f32_ulp3:
1936; GCN-DAZ:       ; %bb.0:
1937; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1938; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
1939; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
1940  %result = call float @llvm.sqrt.f32(float %x), !fpmath !4
1941  ret float %result
1942}
1943
1944define float @v_sqrt_f32_ulp2_fabs(float %x) {
1945; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_fabs:
1946; SDAG-IEEE:       ; %bb.0:
1947; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1948; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
1949; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
1950; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
1951; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1952; SDAG-IEEE-NEXT:    v_ldexp_f32_e64 v0, |v0|, v1
1953; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1954; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, s[4:5]
1955; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1956; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
1957;
1958; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_fabs:
1959; GISEL-IEEE:       ; %bb.0:
1960; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1961; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
1962; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v1
1963; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
1964; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
1965; GISEL-IEEE-NEXT:    v_ldexp_f32_e64 v0, |v0|, v1
1966; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
1967; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, s[4:5]
1968; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
1969; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
1970;
1971; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_fabs:
1972; GCN-DAZ:       ; %bb.0:
1973; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974; GCN-DAZ-NEXT:    v_sqrt_f32_e64 v0, |v0|
1975; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
1976  %x.fabs = call float @llvm.fabs.f32(float %x)
1977  %result = call float @llvm.sqrt.f32(float %x.fabs), !fpmath !2
1978  ret float %result
1979}
1980
1981define <2 x float> @v_sqrt_v2f32_ulp1(<2 x float> %x) {
1982; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp1:
1983; SDAG-IEEE:       ; %bb.0:
1984; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1985; SDAG-IEEE-NEXT:    s_mov_b32 s6, 0xf800000
1986; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1987; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1988; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1989; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v2, v0
1990; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1991; SDAG-IEEE-NEXT:    v_fma_f32 v4, -v3, v2, v0
1992; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1993; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1994; SDAG-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1995; SDAG-IEEE-NEXT:    v_fma_f32 v2, -v4, v2, v0
1996; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1997; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1998; SDAG-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1999; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2000; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
2001; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
2002; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2003; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v4, v1
2004; SDAG-IEEE-NEXT:    v_mov_b32_e32 v3, 0x260
2005; SDAG-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v3
2006; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
2007; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
2008; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, v1
2009; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
2010; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
2011; SDAG-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
2012; SDAG-IEEE-NEXT:    v_fma_f32 v4, -v5, v4, v1
2013; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
2014; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
2015; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
2016; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2017; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v3
2018; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
2019; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2020;
2021; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp1:
2022; GISEL-IEEE:       ; %bb.0:
2023; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2024; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0xf800000
2025; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
2026; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2027; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2028; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v0
2029; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v3
2030; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v3, v0
2031; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v3
2032; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v3, v0
2033; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
2034; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
2035; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v7
2036; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
2037; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
2038; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
2039; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
2040; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v2
2041; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2042; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
2043; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
2044; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v4
2045; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[4:5]
2046; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
2047; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v3, v2, v1
2048; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v2
2049; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v2, v1
2050; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
2051; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
2052; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v7
2053; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
2054; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
2055; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2056; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v4
2057; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
2058; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2059;
2060; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp1:
2061; GCN-DAZ:       ; %bb.0:
2062; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2064; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v1, v1
2065; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2066  %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1
2067  ret <2 x float> %result
2068}
2069
2070; fpmath should always be used
2071define <2 x float> @v_sqrt_v2f32_ulp2(<2 x float> %x) {
2072; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2:
2073; SDAG-IEEE:       ; %bb.0:
2074; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2075; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2076; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2077; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2078; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2079; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
2080; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2081; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2082; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2083; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2084; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2085; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2086; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2087; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2088; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[4:5]
2089; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2090; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2091;
2092; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2:
2093; GISEL-IEEE:       ; %bb.0:
2094; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2096; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2097; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
2098; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], v1, v2
2099; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
2100; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2101; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v3
2102; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2103; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2104; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2105; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2106; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2107; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2108; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[4:5]
2109; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2110; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2111;
2112; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2:
2113; GCN-DAZ:       ; %bb.0:
2114; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2115; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2116; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v1, v1
2117; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2118  %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
2119  ret <2 x float> %result
2120}
2121
2122define <2 x float> @v_sqrt_v2f32_ulp1_fabs(<2 x float> %x) {
2123; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp1_fabs:
2124; SDAG-IEEE:       ; %bb.0:
2125; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126; SDAG-IEEE-NEXT:    s_mov_b32 s6, 0xf800000
2127; SDAG-IEEE-NEXT:    s_mov_b32 s7, 0x4f800000
2128; SDAG-IEEE-NEXT:    v_mul_f32_e64 v2, |v0|, s7
2129; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s6
2130; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, |v0|, v2, vcc
2131; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v2, v0
2132; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
2133; SDAG-IEEE-NEXT:    v_fma_f32 v4, -v3, v2, v0
2134; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
2135; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
2136; SDAG-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
2137; SDAG-IEEE-NEXT:    v_fma_f32 v2, -v4, v2, v0
2138; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
2139; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
2140; SDAG-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
2141; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2142; SDAG-IEEE-NEXT:    v_mul_f32_e64 v4, |v1|, s7
2143; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s6
2144; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, |v1|, v4, vcc
2145; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v4, v1
2146; SDAG-IEEE-NEXT:    v_mov_b32_e32 v3, 0x260
2147; SDAG-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v3
2148; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
2149; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
2150; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v2, v4, v1
2151; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
2152; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
2153; SDAG-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
2154; SDAG-IEEE-NEXT:    v_fma_f32 v4, -v5, v4, v1
2155; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
2156; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
2157; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
2158; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2159; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v3
2160; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
2161; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2162;
2163; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp1_fabs:
2164; GISEL-IEEE:       ; %bb.0:
2165; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2166; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0xf800000
2167; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x4f800000
2168; GISEL-IEEE-NEXT:    v_mul_f32_e64 v4, |v0|, v3
2169; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
2170; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, |v0|, v4, vcc
2171; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v4, v0
2172; GISEL-IEEE-NEXT:    v_mul_f32_e64 v3, |v1|, v3
2173; GISEL-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
2174; GISEL-IEEE-NEXT:    v_fma_f32 v6, -v5, v4, v0
2175; GISEL-IEEE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v4
2176; GISEL-IEEE-NEXT:    v_fma_f32 v8, -v7, v4, v0
2177; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
2178; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
2179; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v8
2180; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
2181; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
2182; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
2183; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v2
2184; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, |v1|, v3, vcc
2185; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
2186; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, 0x260
2187; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, v5
2188; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
2189; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
2190; GISEL-IEEE-NEXT:    v_fma_f32 v4, -v3, v2, v1
2191; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v2
2192; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v2, v1
2193; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
2194; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
2195; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v7
2196; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[4:5]
2197; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
2198; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2199; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v5
2200; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
2201; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2202;
2203; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp1_fabs:
2204; GCN-DAZ:       ; %bb.0:
2205; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2206; GCN-DAZ-NEXT:    v_sqrt_f32_e64 v0, |v0|
2207; GCN-DAZ-NEXT:    v_sqrt_f32_e64 v1, |v1|
2208; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2209  %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
2210  %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x.fabs), !fpmath !1
2211  ret <2 x float> %result
2212}
2213
2214; fpmath should always be used
2215define <2 x float> @v_sqrt_v2f32_ulp2_fabs(<2 x float> %x) {
2216; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_fabs:
2217; SDAG-IEEE:       ; %bb.0:
2218; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2219; SDAG-IEEE-NEXT:    s_mov_b32 s6, 0x800000
2220; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s6
2221; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2222; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2223; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[6:7], |v1|, s6
2224; SDAG-IEEE-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
2225; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[6:7]
2226; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2227; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2228; SDAG-IEEE-NEXT:    v_ldexp_f32_e64 v1, |v1|, v2
2229; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2230; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[4:5]
2231; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2232; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[6:7]
2233; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2234; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2235;
2236; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_fabs:
2237; GISEL-IEEE:       ; %bb.0:
2238; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2239; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2240; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2241; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[4:5]
2242; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[6:7], |v1|, v2
2243; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
2244; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[6:7]
2245; GISEL-IEEE-NEXT:    v_ldexp_f32_e64 v0, |v0|, v3
2246; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2247; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2248; GISEL-IEEE-NEXT:    v_ldexp_f32_e64 v1, |v1|, v2
2249; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2250; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[4:5]
2251; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2252; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[6:7]
2253; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2254; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2255;
2256; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_fabs:
2257; GCN-DAZ:       ; %bb.0:
2258; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259; GCN-DAZ-NEXT:    v_sqrt_f32_e64 v0, |v0|
2260; GCN-DAZ-NEXT:    v_sqrt_f32_e64 v1, |v1|
2261; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2262  %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
2263  %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x.fabs), !fpmath !2
2264  ret <2 x float> %result
2265}
2266
2267; afn is stronger than the fpmath
2268define float @v_sqrt_f32_afn_ulp1(float %x) {
2269; GCN-LABEL: v_sqrt_f32_afn_ulp1:
2270; GCN:       ; %bb.0:
2271; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2272; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
2273; GCN-NEXT:    s_setpc_b64 s[30:31]
2274  %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !1
2275  ret float %result
2276}
2277
2278; afn is stronger than the fpmath
2279define float @v_sqrt_f32_afn_ulp2(float %x) {
2280; GCN-LABEL: v_sqrt_f32_afn_ulp2:
2281; GCN:       ; %bb.0:
2282; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2283; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
2284; GCN-NEXT:    s_setpc_b64 s[30:31]
2285  %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !2
2286  ret float %result
2287}
2288
2289define <2 x float> @v_sqrt_v2f32_afn_ulp1(<2 x float> %x) {
2290; GCN-LABEL: v_sqrt_v2f32_afn_ulp1:
2291; GCN:       ; %bb.0:
2292; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2293; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
2294; GCN-NEXT:    v_sqrt_f32_e32 v1, v1
2295; GCN-NEXT:    s_setpc_b64 s[30:31]
2296  %result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1
2297  ret <2 x float> %result
2298}
2299
2300; fpmath should always be used
2301define <2 x float> @v_sqrt_v2f32_afn_ulp2(<2 x float> %x) {
2302; GCN-LABEL: v_sqrt_v2f32_afn_ulp2:
2303; GCN:       ; %bb.0:
2304; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
2306; GCN-NEXT:    v_sqrt_f32_e32 v1, v1
2307; GCN-NEXT:    s_setpc_b64 s[30:31]
2308  %result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
2309  ret <2 x float> %result
2310}
2311
2312define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) {
2313; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp:
2314; SDAG-IEEE:       ; %bb.0:
2315; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2317; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2318; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
2319; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
2320; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2321; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2322; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
2323; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2324; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2325; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2326; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2327; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
2328; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v1, v1
2329; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2330; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2331; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
2332; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2333;
2334; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp:
2335; GISEL-IEEE:       ; %bb.0:
2336; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2337; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
2338; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
2339; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
2340; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
2341; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2342; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2343; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
2344; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x7f800000
2345; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2346; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2347; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
2348; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
2349; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v1, v1
2350; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2351; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2352; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
2353; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2354;
2355; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp:
2356; GCN-DAZ:       ; %bb.0:
2357; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2358; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2359; GCN-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2360; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2361  %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
2362  %result = fdiv float 1.0, %sqrt, !fpmath !3
2363  ret float %result
2364}
2365
2366define float @v_sqrt_f32_ulp2_contractable_rcp(float %x) {
2367; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
2368; SDAG-IEEE:       ; %bb.0:
2369; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2371; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2372; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 24, vcc
2373; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2374; SDAG-IEEE-NEXT:    v_rsq_f32_e32 v0, v0
2375; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 12, vcc
2376; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2377; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2378;
2379; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
2380; GISEL-IEEE:       ; %bb.0:
2381; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
2383; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
2384; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 24, vcc
2385; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2386; GISEL-IEEE-NEXT:    v_rsq_f32_e32 v0, v0
2387; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 12, vcc
2388; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2389; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2390;
2391; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
2392; GCN-DAZ:       ; %bb.0:
2393; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2395; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2396  %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
2397  %result = fdiv contract float 1.0, %sqrt, !fpmath !3
2398  ret float %result
2399}
2400
2401define float @v_sqrt_f32_ulp2_noncontractable_fdiv(float %x, float %y) {
2402; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
2403; SDAG-IEEE:       ; %bb.0:
2404; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2406; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2407; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2408; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2409; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2410; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2411; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2412; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2413; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v3, v1
2414; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2415; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2416; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2417; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2418; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2419; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2420; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2421; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
2422; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2423; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, v3, v2
2424; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, v1, v0
2425; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2426; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2427;
2428; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
2429; GISEL-IEEE:       ; %bb.0:
2430; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2431; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2432; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2433; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2434; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2435; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2436; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2437; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2438; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7f800000
2439; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v1
2440; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2441; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2442; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
2443; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2444; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2445; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v3
2446; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2447; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v1, v4, vcc
2448; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2449; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, v3, v2
2450; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, v1, v0
2451; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2452; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2453;
2454; SDAG-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
2455; SDAG-DAZ:       ; %bb.0:
2456; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2457; SDAG-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2458; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0x6f800000
2459; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x2f800000
2460; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
2461; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
2462; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v0, v2
2463; SDAG-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2464; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v1, v0
2465; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2466; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
2467;
2468; GISEL-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
2469; GISEL-DAZ:       ; %bb.0:
2470; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2471; GISEL-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2472; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x6f800000
2473; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0x2f800000
2474; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
2475; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, 1.0, v3, vcc
2476; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v0, v2
2477; GISEL-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2478; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v1, v0
2479; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2480; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
2481  %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
2482  %result = fdiv float %y, %sqrt, !fpmath !3
2483  ret float %result
2484}
2485
2486define float @v_sqrt_f32_ulp2_contractable_fdiv(float %x, float %y) {
2487; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
2488; SDAG-IEEE:       ; %bb.0:
2489; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2490; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2491; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2492; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2493; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2494; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2495; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2496; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2497; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2498; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v3, v1
2499; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2500; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2501; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2502; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2503; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2504; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2505; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2506; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
2507; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2508; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, v3, v2
2509; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, v1, v0
2510; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2511; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2512;
2513; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
2514; GISEL-IEEE:       ; %bb.0:
2515; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2516; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2517; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2518; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2519; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2520; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2521; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2522; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2523; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7f800000
2524; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v1
2525; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2526; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2527; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
2528; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2529; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2530; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v3
2531; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2532; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v1, v4, vcc
2533; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2534; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, v3, v2
2535; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, v1, v0
2536; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2537; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2538;
2539; SDAG-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
2540; SDAG-DAZ:       ; %bb.0:
2541; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2542; SDAG-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2543; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0x6f800000
2544; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x2f800000
2545; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
2546; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
2547; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v0, v2
2548; SDAG-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2549; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v1, v0
2550; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2551; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
2552;
2553; GISEL-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
2554; GISEL-DAZ:       ; %bb.0:
2555; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2556; GISEL-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2557; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x6f800000
2558; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0x2f800000
2559; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
2560; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, 1.0, v3, vcc
2561; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v0, v2
2562; GISEL-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2563; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v1, v0
2564; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2565; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
2566  %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
2567  %result = fdiv contract float %y, %sqrt, !fpmath !3
2568  ret float %result
2569}
2570
2571define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) {
2572; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp:
2573; SDAG-IEEE:       ; %bb.0:
2574; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2576; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2577; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2578; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2579; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2580; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2581; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2582; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2583; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2584; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2585; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2586; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2587; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2588; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2589; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2590; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2591; SDAG-IEEE-NEXT:    v_mul_f32_e32 v0, v1, v0
2592; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2593;
2594; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp:
2595; GISEL-IEEE:       ; %bb.0:
2596; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2597; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2598; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2599; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2600; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2601; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2602; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2603; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2604; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7f800000
2605; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2606; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2607; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
2608; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2609; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2610; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2611; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2612; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2613; GISEL-IEEE-NEXT:    v_mul_f32_e32 v0, v1, v0
2614; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2615;
2616; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp:
2617; GCN-DAZ:       ; %bb.0:
2618; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2619; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2620; GCN-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2621; GCN-DAZ-NEXT:    v_mul_f32_e32 v0, v1, v0
2622; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2623  %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
2624  %result = fdiv arcp contract float %y, %sqrt, !fpmath !3
2625  ret float %result
2626}
2627
2628define <2 x float> @v_sqrt_v2f32_ulp2_noncontractable_rcp(<2 x float> %x) {
2629; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp:
2630; SDAG-IEEE:       ; %bb.0:
2631; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2632; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2633; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2634; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2635; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2636; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
2637; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2638; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2639; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2640; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2641; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2642; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2643; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2644; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2645; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[4:5]
2646; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2647; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2648; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2649; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2650; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2651; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2652; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2653; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2654; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2655; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v1
2656; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2657; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2658; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2659; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2660; SDAG-IEEE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
2661; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
2662; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2663;
2664; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp:
2665; GISEL-IEEE:       ; %bb.0:
2666; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2668; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2669; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
2670; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], v1, v2
2671; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
2672; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2673; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v3
2674; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
2675; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2676; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2677; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2678; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
2679; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2680; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, s[4:5]
2681; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7f800000
2682; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2683; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
2684; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
2685; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
2686; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2687; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2688; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2689; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2690; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v1
2691; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v3
2692; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2693; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2694; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2695; GISEL-IEEE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
2696; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
2697; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2698;
2699; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp:
2700; GCN-DAZ:       ; %bb.0:
2701; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2702; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2703; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v1, v1
2704; GCN-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2705; GCN-DAZ-NEXT:    v_rcp_f32_e32 v1, v1
2706; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2707  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
2708  %result = fdiv <2 x float> <float 1.0, float 1.0>, %sqrt, !fpmath !3
2709  ret <2 x float> %result
2710}
2711
2712define <2 x float> @v_sqrt_v2f32_ulp2_contractable_rcp(<2 x float> %x) {
2713; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
2714; SDAG-IEEE:       ; %bb.0:
2715; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2716; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2717; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2718; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 24, vcc
2719; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
2720; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2721; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 24, s[4:5]
2722; SDAG-IEEE-NEXT:    v_rsq_f32_e32 v0, v0
2723; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2724; SDAG-IEEE-NEXT:    v_rsq_f32_e32 v1, v1
2725; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 12, vcc
2726; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2727; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 12, s[4:5]
2728; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2729; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2730;
2731; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
2732; GISEL-IEEE:       ; %bb.0:
2733; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
2735; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
2736; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, 0, 24, vcc
2737; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], v1, v2
2738; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v3
2739; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 24, s[4:5]
2740; GISEL-IEEE-NEXT:    v_rsq_f32_e32 v0, v0
2741; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2742; GISEL-IEEE-NEXT:    v_rsq_f32_e32 v1, v1
2743; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 12, vcc
2744; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2745; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 12, s[4:5]
2746; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2747; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2748;
2749; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
2750; GCN-DAZ:       ; %bb.0:
2751; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2752; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2753; GCN-DAZ-NEXT:    v_rsq_f32_e32 v1, v1
2754; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2755  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
2756  %result = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt, !fpmath !3
2757  ret <2 x float> %result
2758}
2759
2760define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv(<2 x float> %x, <2 x float> %y) {
2761; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
2762; SDAG-IEEE:       ; %bb.0:
2763; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2764; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2765; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2766; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2767; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
2768; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
2769; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
2770; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2771; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
2772; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2773; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2774; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2775; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, vcc
2776; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
2777; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, s[4:5]
2778; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2779; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2780; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v0
2781; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2782; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
2783; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v5, v2
2784; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v2|, s4
2785; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2786; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v5, v2, v5, vcc
2787; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v2, v2
2788; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v4, v4
2789; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
2790; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v1
2791; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2792; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2793; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2794; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, v5, v4
2795; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
2796; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v3
2797; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s4
2798; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2799; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
2800; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v3, v3
2801; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, v4, v2
2802; SDAG-IEEE-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
2803; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
2804; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2805;
2806; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
2807; GISEL-IEEE:       ; %bb.0:
2808; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2809; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x800000
2810; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v4
2811; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2812; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], v1, v4
2813; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
2814; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2815; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v5
2816; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
2817; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2818; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2819; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2820; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, vcc
2821; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
2822; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, s[4:5]
2823; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, 0x7f800000
2824; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2825; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v0
2826; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v5
2827; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
2828; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v6, v2
2829; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v2|, v5
2830; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2831; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v6, v2, v6, vcc
2832; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v2, v2
2833; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v4, v4
2834; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
2835; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v1
2836; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v5
2837; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2838; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2839; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, v6, v4
2840; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
2841; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v3
2842; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, v5
2843; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2844; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
2845; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v3, v3
2846; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, v4, v2
2847; GISEL-IEEE-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
2848; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
2849; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2850;
2851; SDAG-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
2852; SDAG-DAZ:       ; %bb.0:
2853; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2854; SDAG-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2855; SDAG-DAZ-NEXT:    v_sqrt_f32_e32 v1, v1
2856; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0x6f800000
2857; SDAG-DAZ-NEXT:    v_mov_b32_e32 v4, 0x2f800000
2858; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
2859; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v5, 1.0, v4, vcc
2860; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
2861; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v4, 1.0, v4, vcc
2862; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v0, v5
2863; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, v1, v4
2864; SDAG-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2865; SDAG-DAZ-NEXT:    v_rcp_f32_e32 v1, v1
2866; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2867; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, v3, v1
2868; SDAG-DAZ-NEXT:    v_mul_f32_e32 v0, v5, v0
2869; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, v4, v1
2870; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
2871;
2872; GISEL-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
2873; GISEL-DAZ:       ; %bb.0:
2874; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2875; GISEL-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2876; GISEL-DAZ-NEXT:    v_sqrt_f32_e32 v1, v1
2877; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x6f800000
2878; GISEL-DAZ-NEXT:    v_mov_b32_e32 v5, 0x2f800000
2879; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v4
2880; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v6, 1.0, v5, vcc
2881; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, v4
2882; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v4, 1.0, v5, vcc
2883; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v0, v6
2884; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, v1, v4
2885; GISEL-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2886; GISEL-DAZ-NEXT:    v_rcp_f32_e32 v1, v1
2887; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2888; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, v3, v1
2889; GISEL-DAZ-NEXT:    v_mul_f32_e32 v0, v6, v0
2890; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, v4, v1
2891; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
2892  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
2893  %result = fdiv contract <2 x float> %y, %sqrt, !fpmath !3
2894  ret <2 x float> %result
2895}
2896
2897define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv_arcp(<2 x float> %x, <2 x float> %y) {
2898; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp:
2899; SDAG-IEEE:       ; %bb.0:
2900; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2901; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
2902; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2903; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2904; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
2905; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
2906; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
2907; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2908; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
2909; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2910; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2911; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2912; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, vcc
2913; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
2914; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, s[4:5]
2915; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x7f800000
2916; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2917; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v0
2918; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2919; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
2920; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v4, v4
2921; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2922; SDAG-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2923; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
2924; SDAG-IEEE-NEXT:    v_mul_f32_e32 v0, v2, v0
2925; SDAG-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v1
2926; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2927; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2928; SDAG-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2929; SDAG-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2930; SDAG-IEEE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
2931; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
2932; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, v3, v1
2933; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
2934;
2935; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp:
2936; GISEL-IEEE:       ; %bb.0:
2937; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2938; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x800000
2939; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v4
2940; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2941; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], v1, v4
2942; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
2943; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2944; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v5
2945; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
2946; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
2947; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2948; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
2949; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, vcc
2950; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
2951; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, -16, s[4:5]
2952; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, 0x7f800000
2953; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
2954; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v4, v0
2955; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v5
2956; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
2957; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v4, v4
2958; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2959; GISEL-IEEE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2960; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
2961; GISEL-IEEE-NEXT:    v_mul_f32_e32 v0, v2, v0
2962; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v1
2963; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v5
2964; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2965; GISEL-IEEE-NEXT:    v_rcp_f32_e32 v2, v2
2966; GISEL-IEEE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2967; GISEL-IEEE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
2968; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
2969; GISEL-IEEE-NEXT:    v_mul_f32_e32 v1, v3, v1
2970; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
2971;
2972; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp:
2973; GCN-DAZ:       ; %bb.0:
2974; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2975; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
2976; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v1, v1
2977; GCN-DAZ-NEXT:    v_rcp_f32_e32 v0, v0
2978; GCN-DAZ-NEXT:    v_rcp_f32_e32 v1, v1
2979; GCN-DAZ-NEXT:    v_mul_f32_e32 v0, v2, v0
2980; GCN-DAZ-NEXT:    v_mul_f32_e32 v1, v3, v1
2981; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2982  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
2983  %result = fdiv arcp contract <2 x float> %y, %sqrt, !fpmath !3
2984  ret <2 x float> %result
2985}
2986
2987define amdgpu_ps i32 @s_sqrt_f32_ulp1(float inreg %x) {
2988; GCN-LABEL: s_sqrt_f32_ulp1:
2989; GCN:       ; %bb.0:
2990; GCN-NEXT:    v_sqrt_f32_e32 v0, s0
2991; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2992; GCN-NEXT:    ; return to shader part epilog
2993  %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !1
2994  %cast = bitcast float %result to i32
2995  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
2996  ret i32 %firstlane
2997}
2998
2999define amdgpu_ps i32 @s_sqrt_f32_ulp2(float inreg %x) {
3000; GCN-LABEL: s_sqrt_f32_ulp2:
3001; GCN:       ; %bb.0:
3002; GCN-NEXT:    v_sqrt_f32_e32 v0, s0
3003; GCN-NEXT:    v_readfirstlane_b32 s0, v0
3004; GCN-NEXT:    ; return to shader part epilog
3005  %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !2
3006  %cast = bitcast float %result to i32
3007  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
3008  ret i32 %firstlane
3009}
3010
3011define amdgpu_ps i32 @s_sqrt_f32_ulp3(float inreg %x) {
3012; GCN-LABEL: s_sqrt_f32_ulp3:
3013; GCN:       ; %bb.0:
3014; GCN-NEXT:    v_sqrt_f32_e32 v0, s0
3015; GCN-NEXT:    v_readfirstlane_b32 s0, v0
3016; GCN-NEXT:    ; return to shader part epilog
3017  %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !4
3018  %cast = bitcast float %result to i32
3019  %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
3020  ret i32 %firstlane
3021}
3022
3023define float @v_sqrt_f32_known_never_posdenormal_ulp2(float nofpclass(psub) %x) {
3024; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2:
3025; SDAG-IEEE:       ; %bb.0:
3026; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3027; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3028; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3029; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3030; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3031; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3032; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3033; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3034; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3035; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3036;
3037; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2:
3038; GISEL-IEEE:       ; %bb.0:
3039; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3040; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3041; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3042; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3043; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3044; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3045; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3046; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3047; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3048; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3049;
3050; GCN-DAZ-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2:
3051; GCN-DAZ:       ; %bb.0:
3052; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3053; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
3054; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
3055  %result = call float @llvm.sqrt.f32(float %x), !fpmath !2
3056  ret float %result
3057}
3058
3059define float @v_sqrt_f32_nsz_known_never_posdenormal_ulp2(float nofpclass(psub) %x) {
3060; SDAG-IEEE-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2:
3061; SDAG-IEEE:       ; %bb.0:
3062; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3063; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3064; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3065; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3066; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3067; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3068; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3069; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3070; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3071; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3072;
3073; GISEL-IEEE-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2:
3074; GISEL-IEEE:       ; %bb.0:
3075; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3076; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3077; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3078; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3079; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3080; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3081; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3082; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3083; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3084; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3085;
3086; GCN-DAZ-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2:
3087; GCN-DAZ:       ; %bb.0:
3088; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3089; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
3090; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
3091  %result = call nsz float @llvm.sqrt.f32(float %x), !fpmath !2
3092  ret float %result
3093}
3094
3095define float @v_sqrt_f32_known_never_negdenormal(float nofpclass(nsub) %x) {
3096; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_negdenormal:
3097; SDAG-IEEE:       ; %bb.0:
3098; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3099; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3100; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3101; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3102; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3103; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3104; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3105; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3106; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3107; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3108;
3109; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_negdenormal:
3110; GISEL-IEEE:       ; %bb.0:
3111; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3112; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3113; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3114; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3115; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3116; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3117; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3118; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3119; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3120; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3121;
3122; GCN-DAZ-LABEL: v_sqrt_f32_known_never_negdenormal:
3123; GCN-DAZ:       ; %bb.0:
3124; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3125; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
3126; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
3127  %result = call float @llvm.sqrt.f32(float %x), !fpmath !2
3128  ret float %result
3129}
3130
3131define float @v_sqrt_f32_known_never_denormal(float nofpclass(sub) %x) {
3132; GCN-LABEL: v_sqrt_f32_known_never_denormal:
3133; GCN:       ; %bb.0:
3134; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3135; GCN-NEXT:    v_sqrt_f32_e32 v0, v0
3136; GCN-NEXT:    s_setpc_b64 s[30:31]
3137  %result = call float @llvm.sqrt.f32(float %x), !fpmath !2
3138  ret float %result
3139}
3140
3141define float @v_sqrt_f32_ninf_known_never_zero(float nofpclass(zero) %x) {
3142; SDAG-IEEE-LABEL: v_sqrt_f32_ninf_known_never_zero:
3143; SDAG-IEEE:       ; %bb.0:
3144; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3145; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
3146; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3147; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3148; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3149; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3150; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3151; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3152; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3153; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
3154; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
3155; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3156; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
3157; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
3158; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3159; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3160; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3161; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3162; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3163; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3164;
3165; GISEL-IEEE-LABEL: v_sqrt_f32_ninf_known_never_zero:
3166; GISEL-IEEE:       ; %bb.0:
3167; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3168; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
3169; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3170; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3171; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3172; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3173; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3174; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3175; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
3176; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
3177; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3178; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
3179; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
3180; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
3181; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3182; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3183; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3184; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3185; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3186; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3187;
3188; SDAG-DAZ-LABEL: v_sqrt_f32_ninf_known_never_zero:
3189; SDAG-DAZ:       ; %bb.0:
3190; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3191; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
3192; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3193; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3194; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3195; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3196; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3197; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3198; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3199; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3200; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
3201; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3202; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
3203; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3204; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3205; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3206; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3207; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3208; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3209;
3210; GISEL-DAZ-LABEL: v_sqrt_f32_ninf_known_never_zero:
3211; GISEL-DAZ:       ; %bb.0:
3212; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3213; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
3214; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3215; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3216; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3217; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3218; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3219; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3220; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3221; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3222; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3223; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
3224; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
3225; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3226; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3227; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3228; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3229; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3230; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3231  %result = call ninf float @llvm.sqrt.f32(float %x)
3232  ret float %result
3233}
3234
3235define float @v_sqrt_f32_known_never_zero(float nofpclass(zero) %x) {
3236; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero:
3237; SDAG-IEEE:       ; %bb.0:
3238; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3239; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
3240; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3241; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3242; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3243; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3244; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3245; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3246; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3247; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
3248; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
3249; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3250; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
3251; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
3252; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3253; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3254; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3255; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3256; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3257; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3258;
3259; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero:
3260; GISEL-IEEE:       ; %bb.0:
3261; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3262; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
3263; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3264; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3265; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3266; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3267; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3268; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3269; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
3270; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
3271; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3272; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
3273; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
3274; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
3275; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3276; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3277; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3278; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3279; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3280; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3281;
3282; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero:
3283; SDAG-DAZ:       ; %bb.0:
3284; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3285; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
3286; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3287; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3288; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3289; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3290; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3291; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3292; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3293; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3294; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
3295; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3296; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
3297; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3298; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3299; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3300; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3301; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3302; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3303;
3304; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero:
3305; GISEL-DAZ:       ; %bb.0:
3306; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
3308; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3309; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3310; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3311; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3312; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3313; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3314; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3315; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3316; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3317; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
3318; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
3319; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3320; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3321; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3322; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3323; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3324; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3325  %result = call float @llvm.sqrt.f32(float %x)
3326  ret float %result
3327}
3328
3329define float @v_sqrt_f32_known_never_zero_never_inf(float nofpclass(zero inf) %x) {
3330; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_inf:
3331; SDAG-IEEE:       ; %bb.0:
3332; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3333; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
3334; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3335; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3336; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3337; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3338; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3339; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3340; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3341; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
3342; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
3343; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3344; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
3345; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
3346; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3347; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3348; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3349; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3350; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3351; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3352;
3353; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_inf:
3354; GISEL-IEEE:       ; %bb.0:
3355; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3356; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
3357; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3358; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3359; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3360; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3361; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3362; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3363; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
3364; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
3365; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3366; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
3367; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
3368; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
3369; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3370; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3371; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3372; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3373; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3374; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3375;
3376; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_inf:
3377; SDAG-DAZ:       ; %bb.0:
3378; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3379; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
3380; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3381; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3382; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3383; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3384; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3385; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3386; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3387; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3388; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
3389; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3390; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
3391; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3392; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3393; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3394; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3395; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3396; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3397;
3398; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_inf:
3399; GISEL-DAZ:       ; %bb.0:
3400; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3401; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
3402; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3403; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3404; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3405; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3406; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3407; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3408; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3409; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3410; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3411; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
3412; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
3413; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3414; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3415; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3416; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3417; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3418; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3419  %result = call float @llvm.sqrt.f32(float %x)
3420  ret float %result
3421}
3422
3423define float @v_sqrt_f32_known_never_zero_never_ninf(float nofpclass(zero ninf) %x) {
3424; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
3425; SDAG-IEEE:       ; %bb.0:
3426; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3427; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
3428; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3429; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3430; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3431; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3432; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3433; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3434; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3435; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
3436; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
3437; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3438; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
3439; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
3440; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3441; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3442; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3443; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3444; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3445; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3446;
3447; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
3448; GISEL-IEEE:       ; %bb.0:
3449; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3450; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
3451; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3452; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3453; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3454; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3455; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3456; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3457; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
3458; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
3459; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3460; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
3461; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
3462; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
3463; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3464; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3465; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3466; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3467; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3468; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3469;
3470; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
3471; SDAG-DAZ:       ; %bb.0:
3472; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3473; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
3474; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3475; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3476; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3477; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3478; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3479; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3480; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3481; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3482; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
3483; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3484; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
3485; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3486; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3487; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3488; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3489; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3490; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3491;
3492; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
3493; GISEL-DAZ:       ; %bb.0:
3494; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3495; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
3496; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3497; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3498; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3499; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3500; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3501; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3502; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3503; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3504; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3505; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
3506; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
3507; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3508; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3509; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3510; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3511; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3512; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3513  %result = call float @llvm.sqrt.f32(float %x)
3514  ret float %result
3515}
3516
3517define float @v_sqrt_f32_known_never_zero_never_pinf(float nofpclass(zero pinf) %x) {
3518; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
3519; SDAG-IEEE:       ; %bb.0:
3520; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3521; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
3522; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3523; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3524; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3525; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3526; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3527; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3528; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3529; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
3530; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
3531; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3532; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
3533; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
3534; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3535; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3536; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3537; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3538; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3539; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3540;
3541; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
3542; GISEL-IEEE:       ; %bb.0:
3543; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3544; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
3545; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3546; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3547; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3548; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3549; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3550; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3551; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v1
3552; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
3553; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3554; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[4:5]
3555; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v5
3556; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[4:5]
3557; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3558; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3559; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3560; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3561; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3562; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3563;
3564; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
3565; SDAG-DAZ:       ; %bb.0:
3566; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3567; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
3568; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3569; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3570; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3571; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3572; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3573; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3574; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3575; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3576; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
3577; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3578; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
3579; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3580; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3581; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3582; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3583; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3584; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3585;
3586; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
3587; GISEL-DAZ:       ; %bb.0:
3588; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3589; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
3590; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3591; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3592; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3593; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3594; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3595; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3596; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3597; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3598; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3599; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
3600; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
3601; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3602; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3603; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3604; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3605; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3606; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3607  %result = call float @llvm.sqrt.f32(float %x)
3608  ret float %result
3609}
3610
3611define float @v_sqrt_f32_frexp_src(float %x) {
3612; SDAG-LABEL: v_sqrt_f32_frexp_src:
3613; SDAG:       ; %bb.0:
3614; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3615; SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
3616; SDAG-NEXT:    v_frexp_mant_f32_e32 v1, v0
3617; SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
3618; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3619; SDAG-NEXT:    s_mov_b32 s4, 0xf800000
3620; SDAG-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3621; SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3622; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3623; SDAG-NEXT:    v_rsq_f32_e32 v1, v0
3624; SDAG-NEXT:    v_mul_f32_e32 v2, v0, v1
3625; SDAG-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3626; SDAG-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3627; SDAG-NEXT:    v_fma_f32 v2, v2, v3, v2
3628; SDAG-NEXT:    v_fma_f32 v4, -v2, v2, v0
3629; SDAG-NEXT:    v_fma_f32 v1, v1, v3, v1
3630; SDAG-NEXT:    v_fma_f32 v1, v4, v1, v2
3631; SDAG-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3632; SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3633; SDAG-NEXT:    v_mov_b32_e32 v2, 0x260
3634; SDAG-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3635; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3636; SDAG-NEXT:    s_setpc_b64 s[30:31]
3637;
3638; GISEL-LABEL: v_sqrt_f32_frexp_src:
3639; GISEL:       ; %bb.0:
3640; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3641; GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
3642; GISEL-NEXT:    v_frexp_mant_f32_e32 v1, v0
3643; GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
3644; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3645; GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
3646; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3647; GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3648; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3649; GISEL-NEXT:    v_rsq_f32_e32 v1, v0
3650; GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
3651; GISEL-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3652; GISEL-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3653; GISEL-NEXT:    v_fma_f32 v2, v2, v3, v2
3654; GISEL-NEXT:    v_fma_f32 v1, v1, v3, v1
3655; GISEL-NEXT:    v_fma_f32 v3, -v2, v2, v0
3656; GISEL-NEXT:    v_fma_f32 v1, v3, v1, v2
3657; GISEL-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3658; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3659; GISEL-NEXT:    v_mov_b32_e32 v2, 0x260
3660; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3661; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3662; GISEL-NEXT:    s_setpc_b64 s[30:31]
3663  %frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x)
3664  %frexp.mant = extractvalue { float, i32 } %frexp, 0
3665  %result = call float @llvm.sqrt.f32(float %frexp.mant)
3666  ret float %result
3667}
3668
3669define float @v_sqrt_f32_ulp3_frexp_src(float %x) {
3670; SDAG-LABEL: v_sqrt_f32_ulp3_frexp_src:
3671; SDAG:       ; %bb.0:
3672; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3673; SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
3674; SDAG-NEXT:    v_frexp_mant_f32_e32 v1, v0
3675; SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
3676; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3677; SDAG-NEXT:    v_sqrt_f32_e32 v0, v0
3678; SDAG-NEXT:    s_setpc_b64 s[30:31]
3679;
3680; GISEL-LABEL: v_sqrt_f32_ulp3_frexp_src:
3681; GISEL:       ; %bb.0:
3682; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3683; GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
3684; GISEL-NEXT:    v_frexp_mant_f32_e32 v1, v0
3685; GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
3686; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3687; GISEL-NEXT:    v_sqrt_f32_e32 v0, v0
3688; GISEL-NEXT:    s_setpc_b64 s[30:31]
3689  %frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x)
3690  %frexp.mant = extractvalue { float, i32 } %frexp, 0
3691  %result = call float @llvm.sqrt.f32(float %frexp.mant), !fpmath !4
3692  ret float %result
3693}
3694
3695define float @v_sqrt_f32_known_never_zero_never_ninf_ulp2(float nofpclass(zero ninf) %x) {
3696; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2:
3697; SDAG-IEEE:       ; %bb.0:
3698; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3699; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3700; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3701; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3702; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3703; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3704; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3705; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3706; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3707; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3708;
3709; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2:
3710; GISEL-IEEE:       ; %bb.0:
3711; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3712; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3713; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3714; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3715; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3716; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3717; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3718; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3719; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3720; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3721;
3722; GCN-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2:
3723; GCN-DAZ:       ; %bb.0:
3724; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3725; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
3726; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
3727  %result = call float @llvm.sqrt.f32(float %x), !fpmath !2
3728  ret float %result
3729}
3730
3731define float @v_sqrt_f32_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
3732; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_ninf_ulp2:
3733; SDAG-IEEE:       ; %bb.0:
3734; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3735; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3736; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3737; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3738; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3739; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3740; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3741; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3742; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3743; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3744;
3745; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_ninf_ulp2:
3746; GISEL-IEEE:       ; %bb.0:
3747; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3748; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3749; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3750; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3751; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3752; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3753; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3754; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3755; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3756; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3757;
3758; GCN-DAZ-LABEL: v_sqrt_f32_known_never_ninf_ulp2:
3759; GCN-DAZ:       ; %bb.0:
3760; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3761; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
3762; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
3763  %result = call float @llvm.sqrt.f32(float %x), !fpmath !2
3764  ret float %result
3765}
3766
3767define float @v_sqrt_f32_nsz_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
3768; SDAG-IEEE-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2:
3769; SDAG-IEEE:       ; %bb.0:
3770; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3771; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3772; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3773; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3774; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3775; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3776; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3777; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3778; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3779; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3780;
3781; GISEL-IEEE-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2:
3782; GISEL-IEEE:       ; %bb.0:
3783; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3784; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3785; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3786; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3787; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3788; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3789; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
3790; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
3791; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3792; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3793;
3794; GCN-DAZ-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2:
3795; GCN-DAZ:       ; %bb.0:
3796; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3797; GCN-DAZ-NEXT:    v_sqrt_f32_e32 v0, v0
3798; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
3799  %result = call nsz float @llvm.sqrt.f32(float %x), !fpmath !2
3800  ret float %result
3801}
3802
3803define float @v_elim_redun_check_ult_sqrt(float %in) {
3804; SDAG-IEEE-LABEL: v_elim_redun_check_ult_sqrt:
3805; SDAG-IEEE:       ; %bb.0:
3806; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3807; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0xf800000
3808; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3809; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3810; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3811; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3812; SDAG-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
3813; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3814; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
3815; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
3816; SDAG-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
3817; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3818; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
3819; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
3820; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3821; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3822; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3823; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3824; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3825; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3826;
3827; GISEL-IEEE-LABEL: v_elim_redun_check_ult_sqrt:
3828; GISEL-IEEE:       ; %bb.0:
3829; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3830; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
3831; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3832; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3833; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
3834; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
3835; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
3836; GISEL-IEEE-NEXT:    v_fma_f32 v4, -v3, v2, v1
3837; GISEL-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v2
3838; GISEL-IEEE-NEXT:    v_fma_f32 v6, -v5, v2, v1
3839; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
3840; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
3841; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v6
3842; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
3843; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
3844; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3845; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x260
3846; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v1, v3
3847; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
3848; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v2, 1
3849; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3850; GISEL-IEEE-NEXT:    v_cmp_nge_f32_e32 vcc, v0, v2
3851; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
3852; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3853;
3854; SDAG-DAZ-LABEL: v_elim_redun_check_ult_sqrt:
3855; SDAG-DAZ:       ; %bb.0:
3856; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3857; SDAG-DAZ-NEXT:    s_mov_b32 s4, 0xf800000
3858; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
3859; SDAG-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3860; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3861; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
3862; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
3863; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
3864; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
3865; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
3866; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
3867; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
3868; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
3869; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3870; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3871; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
3872; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3873; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3874; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3875;
3876; GISEL-DAZ-LABEL: v_elim_redun_check_ult_sqrt:
3877; GISEL-DAZ:       ; %bb.0:
3878; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
3880; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
3881; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3882; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
3883; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v2, v1
3884; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, v1, v2
3885; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0.5, v2
3886; GISEL-DAZ-NEXT:    v_fma_f32 v4, -v2, v3, 0.5
3887; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v4, v3
3888; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v4, v2
3889; GISEL-DAZ-NEXT:    v_fma_f32 v4, -v3, v3, v1
3890; GISEL-DAZ-NEXT:    v_fma_f32 v2, v4, v2, v3
3891; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
3892; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3893; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0x260
3894; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v1, v3
3895; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
3896; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v2, 1
3897; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3898; GISEL-DAZ-NEXT:    v_cmp_nge_f32_e32 vcc, v0, v2
3899; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
3900; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3901  %sqrt = call float @llvm.sqrt.f32(float %in)
3902  %cmp = fcmp ult float %in, -0.000000e+00
3903  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
3904  ret float %res
3905}
3906
3907define float @v_elim_redun_check_ult_sqrt_ulp3(float %in) {
3908; SDAG-IEEE-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
3909; SDAG-IEEE:       ; %bb.0:
3910; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3911; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
3912; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
3913; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3914; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3915; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v0, v1
3916; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
3917; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
3918; SDAG-IEEE-NEXT:    s_brev_b32 s4, 1
3919; SDAG-IEEE-NEXT:    v_cmp_le_f32_e32 vcc, s4, v0
3920; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
3921; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
3922; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
3923; SDAG-IEEE-NEXT:    s_setpc_b64 s[30:31]
3924;
3925; GISEL-IEEE-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
3926; GISEL-IEEE:       ; %bb.0:
3927; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3928; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
3929; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
3930; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
3931; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
3932; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v0, v1
3933; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
3934; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
3935; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3936; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
3937; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v2, 1
3938; GISEL-IEEE-NEXT:    v_cmp_nge_f32_e32 vcc, v0, v2
3939; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
3940; GISEL-IEEE-NEXT:    s_setpc_b64 s[30:31]
3941;
3942; SDAG-DAZ-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
3943; SDAG-DAZ:       ; %bb.0:
3944; SDAG-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3945; SDAG-DAZ-NEXT:    v_sqrt_f32_e32 v1, v0
3946; SDAG-DAZ-NEXT:    s_brev_b32 s4, 1
3947; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
3948; SDAG-DAZ-NEXT:    v_cmp_le_f32_e32 vcc, s4, v0
3949; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
3950; SDAG-DAZ-NEXT:    s_setpc_b64 s[30:31]
3951;
3952; GISEL-DAZ-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
3953; GISEL-DAZ:       ; %bb.0:
3954; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3955; GISEL-DAZ-NEXT:    v_sqrt_f32_e32 v1, v0
3956; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v2, 1
3957; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3958; GISEL-DAZ-NEXT:    v_cmp_nge_f32_e32 vcc, v0, v2
3959; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
3960; GISEL-DAZ-NEXT:    s_setpc_b64 s[30:31]
3961  %sqrt = call float @llvm.sqrt.f32(float %in), !fpmath !4
3962  %cmp = fcmp ult float %in, -0.000000e+00
3963  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
3964  ret float %res
3965}
3966
3967define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %in) {
3968; SDAG-IEEE-LABEL: elim_redun_check_neg0:
3969; SDAG-IEEE:       ; %bb.0: ; %entry
3970; SDAG-IEEE-NEXT:    s_load_dword s0, s[4:5], 0xb
3971; SDAG-IEEE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
3972; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
3973; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
3974; SDAG-IEEE-NEXT:    s_mov_b32 s7, 0xf000
3975; SDAG-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
3976; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s0, v1
3977; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, s0
3978; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
3979; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
3980; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
3981; SDAG-IEEE-NEXT:    s_mov_b32 s6, -1
3982; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
3983; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
3984; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
3985; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
3986; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
3987; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
3988; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
3989; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
3990; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
3991; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
3992; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
3993; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
3994; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3995; SDAG-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
3996; SDAG-IEEE-NEXT:    s_endpgm
3997;
3998; GISEL-IEEE-LABEL: elim_redun_check_neg0:
3999; GISEL-IEEE:       ; %bb.0: ; %entry
4000; GISEL-IEEE-NEXT:    s_load_dword s2, s[4:5], 0xb
4001; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4002; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4003; GISEL-IEEE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
4004; GISEL-IEEE-NEXT:    s_mov_b32 s6, -1
4005; GISEL-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4006; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, s2
4007; GISEL-IEEE-NEXT:    v_mul_f32_e32 v1, s2, v1
4008; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4009; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4010; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4011; GISEL-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4012; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
4013; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
4014; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], 1, v1
4015; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
4016; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v3
4017; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
4018; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v5
4019; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[0:1]
4020; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4021; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4022; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
4023; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4024; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4025; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v1, 1
4026; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
4027; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
4028; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4029; GISEL-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4030; GISEL-IEEE-NEXT:    s_endpgm
4031;
4032; SDAG-DAZ-LABEL: elim_redun_check_neg0:
4033; SDAG-DAZ:       ; %bb.0: ; %entry
4034; SDAG-DAZ-NEXT:    s_load_dword s0, s[4:5], 0xb
4035; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4036; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4037; SDAG-DAZ-NEXT:    s_mov_b32 s3, 0xf000
4038; SDAG-DAZ-NEXT:    s_mov_b32 s2, -1
4039; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4040; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s0, v1
4041; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, s0
4042; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
4043; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4044; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4045; SDAG-DAZ-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
4046; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
4047; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4048; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
4049; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
4050; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
4051; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
4052; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
4053; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4054; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4055; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
4056; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4057; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4058; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4059; SDAG-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
4060; SDAG-DAZ-NEXT:    s_endpgm
4061;
4062; GISEL-DAZ-LABEL: elim_redun_check_neg0:
4063; GISEL-DAZ:       ; %bb.0: ; %entry
4064; GISEL-DAZ-NEXT:    s_load_dword s2, s[4:5], 0xb
4065; GISEL-DAZ-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
4066; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4067; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4068; GISEL-DAZ-NEXT:    s_mov_b32 s3, 0xf000
4069; GISEL-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4070; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, s2
4071; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, s2, v1
4072; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4073; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4074; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4075; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
4076; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4077; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
4078; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
4079; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
4080; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
4081; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
4082; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4083; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4084; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
4085; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4086; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4087; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v1, 1
4088; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
4089; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
4090; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4091; GISEL-DAZ-NEXT:    s_mov_b32 s2, -1
4092; GISEL-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
4093; GISEL-DAZ-NEXT:    s_endpgm
4094entry:
4095  %sqrt = call float @llvm.sqrt.f32(float %in)
4096  %cmp = fcmp olt float %in, -0.000000e+00
4097  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
4098  store float %res, ptr addrspace(1) %out
4099  ret void
4100}
4101
4102define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %in) {
4103; SDAG-IEEE-LABEL: elim_redun_check_pos0:
4104; SDAG-IEEE:       ; %bb.0: ; %entry
4105; SDAG-IEEE-NEXT:    s_load_dword s0, s[4:5], 0xb
4106; SDAG-IEEE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
4107; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4108; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4109; SDAG-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4110; SDAG-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4111; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s0, v1
4112; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, s0
4113; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
4114; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
4115; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4116; SDAG-IEEE-NEXT:    s_mov_b32 s6, -1
4117; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
4118; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
4119; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
4120; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
4121; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
4122; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
4123; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
4124; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
4125; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4126; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
4127; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
4128; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4129; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4130; SDAG-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4131; SDAG-IEEE-NEXT:    s_endpgm
4132;
4133; GISEL-IEEE-LABEL: elim_redun_check_pos0:
4134; GISEL-IEEE:       ; %bb.0: ; %entry
4135; GISEL-IEEE-NEXT:    s_load_dword s2, s[4:5], 0xb
4136; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4137; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4138; GISEL-IEEE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
4139; GISEL-IEEE-NEXT:    s_mov_b32 s6, -1
4140; GISEL-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4141; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, s2
4142; GISEL-IEEE-NEXT:    v_mul_f32_e32 v1, s2, v1
4143; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4144; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4145; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4146; GISEL-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4147; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
4148; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
4149; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], 1, v1
4150; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
4151; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v3
4152; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
4153; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v5
4154; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[0:1]
4155; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4156; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4157; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
4158; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4159; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4160; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
4161; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, s2, 0
4162; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
4163; GISEL-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4164; GISEL-IEEE-NEXT:    s_endpgm
4165;
4166; SDAG-DAZ-LABEL: elim_redun_check_pos0:
4167; SDAG-DAZ:       ; %bb.0: ; %entry
4168; SDAG-DAZ-NEXT:    s_load_dword s0, s[4:5], 0xb
4169; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4170; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4171; SDAG-DAZ-NEXT:    s_mov_b32 s3, 0xf000
4172; SDAG-DAZ-NEXT:    s_mov_b32 s2, -1
4173; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4174; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s0, v1
4175; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, s0
4176; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
4177; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4178; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4179; SDAG-DAZ-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
4180; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
4181; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4182; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
4183; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
4184; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
4185; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
4186; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
4187; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4188; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4189; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
4190; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4191; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4192; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4193; SDAG-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
4194; SDAG-DAZ-NEXT:    s_endpgm
4195;
4196; GISEL-DAZ-LABEL: elim_redun_check_pos0:
4197; GISEL-DAZ:       ; %bb.0: ; %entry
4198; GISEL-DAZ-NEXT:    s_load_dword s2, s[4:5], 0xb
4199; GISEL-DAZ-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
4200; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4201; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4202; GISEL-DAZ-NEXT:    s_mov_b32 s3, 0xf000
4203; GISEL-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4204; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, s2
4205; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, s2, v1
4206; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4207; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4208; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4209; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
4210; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4211; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
4212; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
4213; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
4214; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
4215; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
4216; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4217; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4218; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
4219; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4220; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4221; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
4222; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, s2, 0
4223; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
4224; GISEL-DAZ-NEXT:    s_mov_b32 s2, -1
4225; GISEL-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
4226; GISEL-DAZ-NEXT:    s_endpgm
4227entry:
4228  %sqrt = call float @llvm.sqrt.f32(float %in)
4229  %cmp = fcmp olt float %in, 0.000000e+00
4230  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
4231  store float %res, ptr addrspace(1) %out
4232  ret void
4233}
4234
4235define amdgpu_kernel void @elim_redun_check_ult(ptr addrspace(1) %out, float %in) {
4236; SDAG-IEEE-LABEL: elim_redun_check_ult:
4237; SDAG-IEEE:       ; %bb.0: ; %entry
4238; SDAG-IEEE-NEXT:    s_load_dword s0, s[4:5], 0xb
4239; SDAG-IEEE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
4240; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4241; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4242; SDAG-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4243; SDAG-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4244; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s0, v1
4245; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, s0
4246; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
4247; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
4248; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4249; SDAG-IEEE-NEXT:    s_mov_b32 s6, -1
4250; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
4251; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
4252; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
4253; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
4254; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
4255; SDAG-IEEE-NEXT:    v_fma_f32 v1, -v3, v1, v0
4256; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
4257; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
4258; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4259; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
4260; SDAG-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
4261; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4262; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4263; SDAG-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4264; SDAG-IEEE-NEXT:    s_endpgm
4265;
4266; GISEL-IEEE-LABEL: elim_redun_check_ult:
4267; GISEL-IEEE:       ; %bb.0: ; %entry
4268; GISEL-IEEE-NEXT:    s_load_dword s2, s[4:5], 0xb
4269; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4270; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4271; GISEL-IEEE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
4272; GISEL-IEEE-NEXT:    s_mov_b32 s6, -1
4273; GISEL-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4274; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, s2
4275; GISEL-IEEE-NEXT:    v_mul_f32_e32 v1, s2, v1
4276; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4277; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4278; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4279; GISEL-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4280; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
4281; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
4282; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], 1, v1
4283; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v4, v1, v0
4284; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v3
4285; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
4286; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v5
4287; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[0:1]
4288; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4289; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4290; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x260
4291; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4292; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4293; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v1, 1
4294; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
4295; GISEL-IEEE-NEXT:    v_cmp_nge_f32_e32 vcc, s2, v1
4296; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4297; GISEL-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4298; GISEL-IEEE-NEXT:    s_endpgm
4299;
4300; SDAG-DAZ-LABEL: elim_redun_check_ult:
4301; SDAG-DAZ:       ; %bb.0: ; %entry
4302; SDAG-DAZ-NEXT:    s_load_dword s0, s[4:5], 0xb
4303; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4304; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4305; SDAG-DAZ-NEXT:    s_mov_b32 s3, 0xf000
4306; SDAG-DAZ-NEXT:    s_mov_b32 s2, -1
4307; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4308; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s0, v1
4309; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, s0
4310; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
4311; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4312; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4313; SDAG-DAZ-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
4314; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
4315; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4316; SDAG-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
4317; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
4318; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v2, v2, v0
4319; SDAG-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
4320; SDAG-DAZ-NEXT:    v_fma_f32 v1, v4, v1, v2
4321; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4322; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4323; SDAG-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
4324; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4325; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4326; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4327; SDAG-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
4328; SDAG-DAZ-NEXT:    s_endpgm
4329;
4330; GISEL-DAZ-LABEL: elim_redun_check_ult:
4331; GISEL-DAZ:       ; %bb.0: ; %entry
4332; GISEL-DAZ-NEXT:    s_load_dword s2, s[4:5], 0xb
4333; GISEL-DAZ-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
4334; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4335; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4336; GISEL-DAZ-NEXT:    s_mov_b32 s3, 0xf000
4337; GISEL-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4338; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, s2
4339; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, s2, v1
4340; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4341; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4342; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4343; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
4344; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4345; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
4346; GISEL-DAZ-NEXT:    v_fma_f32 v2, v2, v3, v2
4347; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v3, v1
4348; GISEL-DAZ-NEXT:    v_fma_f32 v3, -v2, v2, v0
4349; GISEL-DAZ-NEXT:    v_fma_f32 v1, v3, v1, v2
4350; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
4351; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
4352; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x260
4353; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
4354; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
4355; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v1, 1
4356; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
4357; GISEL-DAZ-NEXT:    v_cmp_nge_f32_e32 vcc, s2, v1
4358; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4359; GISEL-DAZ-NEXT:    s_mov_b32 s2, -1
4360; GISEL-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
4361; GISEL-DAZ-NEXT:    s_endpgm
4362entry:
4363  %sqrt = call float @llvm.sqrt.f32(float %in)
4364  %cmp = fcmp ult float %in, -0.000000e+00
4365  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
4366  store float %res, ptr addrspace(1) %out
4367  ret void
4368}
4369
4370define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float> %in) {
4371; SDAG-IEEE-LABEL: elim_redun_check_v2:
4372; SDAG-IEEE:       ; %bb.0: ; %entry
4373; SDAG-IEEE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
4374; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4375; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4376; SDAG-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4377; SDAG-IEEE-NEXT:    s_mov_b32 s6, -1
4378; SDAG-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4379; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, s11, v1
4380; SDAG-IEEE-NEXT:    v_mov_b32_e32 v3, s11
4381; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s11, v0
4382; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
4383; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v3, v2
4384; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s10, v1
4385; SDAG-IEEE-NEXT:    s_mov_b32 s4, s8
4386; SDAG-IEEE-NEXT:    s_mov_b32 s5, s9
4387; SDAG-IEEE-NEXT:    v_add_i32_e32 v4, vcc, -1, v3
4388; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v4, v3, v2
4389; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
4390; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
4391; SDAG-IEEE-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
4392; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v5, v3, v2
4393; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v3
4394; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
4395; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
4396; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[0:1]
4397; SDAG-IEEE-NEXT:    v_mov_b32_e32 v5, s10
4398; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
4399; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v5, v1, s[0:1]
4400; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v5, v0
4401; SDAG-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
4402; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v2, v4
4403; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
4404; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v5
4405; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v5, v0
4406; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
4407; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
4408; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
4409; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v3, v5, v0
4410; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v5
4411; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4412; SDAG-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
4413; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
4414; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
4415; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
4416; SDAG-IEEE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4417; SDAG-IEEE-NEXT:    s_endpgm
4418;
4419; GISEL-IEEE-LABEL: elim_redun_check_v2:
4420; GISEL-IEEE:       ; %bb.0: ; %entry
4421; GISEL-IEEE-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
4422; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4423; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4424; GISEL-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4425; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, s6
4426; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, s6, v1
4427; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
4428; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4429; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v2
4430; GISEL-IEEE-NEXT:    v_mul_f32_e32 v1, s7, v1
4431; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], -1, v3
4432; GISEL-IEEE-NEXT:    v_add_i32_e64 v5, s[0:1], 1, v3
4433; GISEL-IEEE-NEXT:    v_fma_f32 v6, -v4, v3, v2
4434; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v5, v3, v2
4435; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v6
4436; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[0:1]
4437; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v7
4438; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[0:1]
4439; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
4440; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
4441; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, s7
4442; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
4443; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
4444; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4445; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
4446; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[0:1], v2, v4
4447; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
4448; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[0:1], -1, v1
4449; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v3, v1, v0
4450; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[0:1], 1, v1
4451; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v1, v0
4452; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v5
4453; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
4454; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v7
4455; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[0:1]
4456; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
4457; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4458; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
4459; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v3, 1
4460; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
4461; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
4462; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v3
4463; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
4464; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v3
4465; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
4466; GISEL-IEEE-NEXT:    s_mov_b32 s6, -1
4467; GISEL-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4468; GISEL-IEEE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4469; GISEL-IEEE-NEXT:    s_endpgm
4470;
4471; SDAG-DAZ-LABEL: elim_redun_check_v2:
4472; SDAG-DAZ:       ; %bb.0: ; %entry
4473; SDAG-DAZ-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4474; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4475; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4476; SDAG-DAZ-NEXT:    s_mov_b32 s7, 0xf000
4477; SDAG-DAZ-NEXT:    s_mov_b32 s6, -1
4478; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4479; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, s3, v1
4480; SDAG-DAZ-NEXT:    v_mov_b32_e32 v3, s3
4481; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
4482; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
4483; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v3, v2
4484; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s2, v1
4485; SDAG-DAZ-NEXT:    s_mov_b32 s4, s0
4486; SDAG-DAZ-NEXT:    s_mov_b32 s5, s1
4487; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, v2, v3
4488; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
4489; SDAG-DAZ-NEXT:    v_fma_f32 v5, -v3, v4, 0.5
4490; SDAG-DAZ-NEXT:    v_fma_f32 v4, v4, v5, v4
4491; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v4, v4, v2
4492; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
4493; SDAG-DAZ-NEXT:    v_fma_f32 v3, v6, v3, v4
4494; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
4495; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
4496; SDAG-DAZ-NEXT:    v_mov_b32_e32 v4, s2
4497; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4498; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
4499; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v4, v0
4500; SDAG-DAZ-NEXT:    v_mov_b32_e32 v5, 0x260
4501; SDAG-DAZ-NEXT:    v_cmp_class_f32_e64 s[0:1], v2, v5
4502; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v1, v3, v2, s[0:1]
4503; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v4
4504; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v4
4505; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v3, v2, 0.5
4506; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v4, v2
4507; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v2, v2, v0
4508; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v4, v3
4509; SDAG-DAZ-NEXT:    v_fma_f32 v2, v6, v3, v2
4510; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
4511; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4512; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
4513; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
4514; SDAG-DAZ-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4515; SDAG-DAZ-NEXT:    s_endpgm
4516;
4517; GISEL-DAZ-LABEL: elim_redun_check_v2:
4518; GISEL-DAZ:       ; %bb.0: ; %entry
4519; GISEL-DAZ-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
4520; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4521; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4522; GISEL-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4523; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, s6
4524; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, s6, v1
4525; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
4526; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4527; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v3, v2
4528; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, s7
4529; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, s7, v1
4530; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, v2, v3
4531; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
4532; GISEL-DAZ-NEXT:    v_fma_f32 v6, -v3, v5, 0.5
4533; GISEL-DAZ-NEXT:    v_fma_f32 v5, v5, v6, v5
4534; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v6, v3
4535; GISEL-DAZ-NEXT:    v_fma_f32 v6, -v5, v5, v2
4536; GISEL-DAZ-NEXT:    v_fma_f32 v3, v6, v3, v5
4537; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x37800000, v3
4538; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
4539; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
4540; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
4541; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4542; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x260
4543; GISEL-DAZ-NEXT:    v_cmp_class_f32_e64 s[0:1], v2, v4
4544; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
4545; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, v0, v1
4546; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4547; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v1, v3, 0.5
4548; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
4549; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v5, v1
4550; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v3, v3, v0
4551; GISEL-DAZ-NEXT:    v_fma_f32 v1, v5, v1, v3
4552; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
4553; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4554; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
4555; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v3, 1
4556; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
4557; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
4558; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v3
4559; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
4560; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v3
4561; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
4562; GISEL-DAZ-NEXT:    s_mov_b32 s6, -1
4563; GISEL-DAZ-NEXT:    s_mov_b32 s7, 0xf000
4564; GISEL-DAZ-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4565; GISEL-DAZ-NEXT:    s_endpgm
4566entry:
4567  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
4568  %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
4569  %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
4570  store <2 x float> %res, ptr addrspace(1) %out
4571  ret void
4572}
4573
4574define amdgpu_kernel void @elim_redun_check_v2_ult(ptr addrspace(1) %out, <2 x float> %in) {
4575; SDAG-IEEE-LABEL: elim_redun_check_v2_ult:
4576; SDAG-IEEE:       ; %bb.0: ; %entry
4577; SDAG-IEEE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
4578; SDAG-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4579; SDAG-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4580; SDAG-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4581; SDAG-IEEE-NEXT:    s_mov_b32 s6, -1
4582; SDAG-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4583; SDAG-IEEE-NEXT:    v_mul_f32_e32 v2, s11, v1
4584; SDAG-IEEE-NEXT:    v_mov_b32_e32 v3, s11
4585; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s11, v0
4586; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
4587; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v3, v2
4588; SDAG-IEEE-NEXT:    v_mul_f32_e32 v1, s10, v1
4589; SDAG-IEEE-NEXT:    s_mov_b32 s4, s8
4590; SDAG-IEEE-NEXT:    s_mov_b32 s5, s9
4591; SDAG-IEEE-NEXT:    v_add_i32_e32 v4, vcc, -1, v3
4592; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v4, v3, v2
4593; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
4594; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
4595; SDAG-IEEE-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
4596; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v5, v3, v2
4597; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v3
4598; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
4599; SDAG-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
4600; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[0:1]
4601; SDAG-IEEE-NEXT:    v_mov_b32_e32 v5, s10
4602; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
4603; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v0, v5, v1, s[0:1]
4604; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v5, v0
4605; SDAG-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
4606; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v2, v4
4607; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
4608; SDAG-IEEE-NEXT:    v_add_i32_e32 v2, vcc, -1, v5
4609; SDAG-IEEE-NEXT:    v_fma_f32 v3, -v2, v5, v0
4610; SDAG-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
4611; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
4612; SDAG-IEEE-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
4613; SDAG-IEEE-NEXT:    v_fma_f32 v5, -v3, v5, v0
4614; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v5
4615; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4616; SDAG-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
4617; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
4618; SDAG-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
4619; SDAG-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
4620; SDAG-IEEE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4621; SDAG-IEEE-NEXT:    s_endpgm
4622;
4623; GISEL-IEEE-LABEL: elim_redun_check_v2_ult:
4624; GISEL-IEEE:       ; %bb.0: ; %entry
4625; GISEL-IEEE-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
4626; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
4627; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4628; GISEL-IEEE-NEXT:    s_waitcnt lgkmcnt(0)
4629; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, s6
4630; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, s6, v1
4631; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
4632; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4633; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v2
4634; GISEL-IEEE-NEXT:    v_mul_f32_e32 v1, s7, v1
4635; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[0:1], -1, v3
4636; GISEL-IEEE-NEXT:    v_add_i32_e64 v5, s[0:1], 1, v3
4637; GISEL-IEEE-NEXT:    v_fma_f32 v6, -v4, v3, v2
4638; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v5, v3, v2
4639; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v6
4640; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[0:1]
4641; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v7
4642; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[0:1]
4643; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
4644; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
4645; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, s7
4646; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
4647; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
4648; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
4649; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
4650; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[0:1], v2, v4
4651; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
4652; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[0:1], -1, v1
4653; GISEL-IEEE-NEXT:    v_fma_f32 v5, -v3, v1, v0
4654; GISEL-IEEE-NEXT:    v_add_i32_e64 v6, s[0:1], 1, v1
4655; GISEL-IEEE-NEXT:    v_fma_f32 v7, -v6, v1, v0
4656; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v5
4657; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
4658; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v7
4659; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[0:1]
4660; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
4661; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4662; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
4663; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v3, 1
4664; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
4665; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
4666; GISEL-IEEE-NEXT:    v_cmp_nge_f32_e32 vcc, s6, v3
4667; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
4668; GISEL-IEEE-NEXT:    v_cmp_nge_f32_e32 vcc, s7, v3
4669; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
4670; GISEL-IEEE-NEXT:    s_mov_b32 s6, -1
4671; GISEL-IEEE-NEXT:    s_mov_b32 s7, 0xf000
4672; GISEL-IEEE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4673; GISEL-IEEE-NEXT:    s_endpgm
4674;
4675; SDAG-DAZ-LABEL: elim_redun_check_v2_ult:
4676; SDAG-DAZ:       ; %bb.0: ; %entry
4677; SDAG-DAZ-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4678; SDAG-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4679; SDAG-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4680; SDAG-DAZ-NEXT:    s_mov_b32 s7, 0xf000
4681; SDAG-DAZ-NEXT:    s_mov_b32 s6, -1
4682; SDAG-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4683; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, s3, v1
4684; SDAG-DAZ-NEXT:    v_mov_b32_e32 v3, s3
4685; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
4686; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
4687; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v3, v2
4688; SDAG-DAZ-NEXT:    v_mul_f32_e32 v1, s2, v1
4689; SDAG-DAZ-NEXT:    s_mov_b32 s4, s0
4690; SDAG-DAZ-NEXT:    s_mov_b32 s5, s1
4691; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, v2, v3
4692; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
4693; SDAG-DAZ-NEXT:    v_fma_f32 v5, -v3, v4, 0.5
4694; SDAG-DAZ-NEXT:    v_fma_f32 v4, v4, v5, v4
4695; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v4, v4, v2
4696; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
4697; SDAG-DAZ-NEXT:    v_fma_f32 v3, v6, v3, v4
4698; SDAG-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
4699; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
4700; SDAG-DAZ-NEXT:    v_mov_b32_e32 v4, s2
4701; SDAG-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
4702; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
4703; SDAG-DAZ-NEXT:    v_rsq_f32_e32 v4, v0
4704; SDAG-DAZ-NEXT:    v_mov_b32_e32 v5, 0x260
4705; SDAG-DAZ-NEXT:    v_cmp_class_f32_e64 s[0:1], v2, v5
4706; SDAG-DAZ-NEXT:    v_cndmask_b32_e64 v1, v3, v2, s[0:1]
4707; SDAG-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v4
4708; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v4
4709; SDAG-DAZ-NEXT:    v_fma_f32 v4, -v3, v2, 0.5
4710; SDAG-DAZ-NEXT:    v_fma_f32 v2, v2, v4, v2
4711; SDAG-DAZ-NEXT:    v_fma_f32 v6, -v2, v2, v0
4712; SDAG-DAZ-NEXT:    v_fma_f32 v3, v3, v4, v3
4713; SDAG-DAZ-NEXT:    v_fma_f32 v2, v6, v3, v2
4714; SDAG-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
4715; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4716; SDAG-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
4717; SDAG-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
4718; SDAG-DAZ-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4719; SDAG-DAZ-NEXT:    s_endpgm
4720;
4721; GISEL-DAZ-LABEL: elim_redun_check_v2_ult:
4722; GISEL-DAZ:       ; %bb.0: ; %entry
4723; GISEL-DAZ-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
4724; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
4725; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x4f800000
4726; GISEL-DAZ-NEXT:    s_waitcnt lgkmcnt(0)
4727; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, s6
4728; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, s6, v1
4729; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
4730; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
4731; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v3, v2
4732; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, s7
4733; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, s7, v1
4734; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, v2, v3
4735; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0.5, v3
4736; GISEL-DAZ-NEXT:    v_fma_f32 v6, -v3, v5, 0.5
4737; GISEL-DAZ-NEXT:    v_fma_f32 v5, v5, v6, v5
4738; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v6, v3
4739; GISEL-DAZ-NEXT:    v_fma_f32 v6, -v5, v5, v2
4740; GISEL-DAZ-NEXT:    v_fma_f32 v3, v6, v3, v5
4741; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x37800000, v3
4742; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
4743; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
4744; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
4745; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
4746; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x260
4747; GISEL-DAZ-NEXT:    v_cmp_class_f32_e64 s[0:1], v2, v4
4748; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
4749; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, v0, v1
4750; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
4751; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v1, v3, 0.5
4752; GISEL-DAZ-NEXT:    v_fma_f32 v3, v3, v5, v3
4753; GISEL-DAZ-NEXT:    v_fma_f32 v1, v1, v5, v1
4754; GISEL-DAZ-NEXT:    v_fma_f32 v5, -v3, v3, v0
4755; GISEL-DAZ-NEXT:    v_fma_f32 v1, v5, v1, v3
4756; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
4757; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4758; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
4759; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v3, 1
4760; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
4761; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
4762; GISEL-DAZ-NEXT:    v_cmp_nge_f32_e32 vcc, s6, v3
4763; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
4764; GISEL-DAZ-NEXT:    v_cmp_nge_f32_e32 vcc, s7, v3
4765; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
4766; GISEL-DAZ-NEXT:    s_mov_b32 s6, -1
4767; GISEL-DAZ-NEXT:    s_mov_b32 s7, 0xf000
4768; GISEL-DAZ-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4769; GISEL-DAZ-NEXT:    s_endpgm
4770entry:
4771  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
4772  %cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
4773  %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
4774  store <2 x float> %res, ptr addrspace(1) %out
4775  ret void
4776}
4777
4778declare float @llvm.fabs.f32(float) #0
4779declare float @llvm.sqrt.f32(float) #0
4780declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
4781declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
4782declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) #0
4783declare i32 @llvm.amdgcn.readfirstlane(i32) #1
4784
4785declare { float, i32 } @llvm.frexp.f32.i32(float) #0
4786
4787attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
4788attributes #1 = { convergent nounwind willreturn memory(none) }
4789attributes #2 = { "approx-func-fp-math"="true" }
4790attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
4791attributes #4 = { "unsafe-fp-math"="true" }
4792attributes #5 = { "no-infs-fp-math"="true" }
4793
4794!0 = !{float 0.5}
4795!1 = !{float 1.0}
4796!2 = !{float 2.0}
4797!3 = !{float 2.5}
4798!4 = !{float 3.0}
4799;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
4800; GCN-IEEE: {{.*}}
4801