xref: /llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll (revision 231aa0f2120552b474bf86d5ff6721a6c555fdc3)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE %s
3; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE %s
4; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s
5
6define amdgpu_kernel void @noop_sqrt_fpmath(ptr addrspace(1) %out, float %x) #0 {
7; CHECK-LABEL: define amdgpu_kernel void @noop_sqrt_fpmath
8; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
9; CHECK-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
10; CHECK-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
11; CHECK-NEXT:    ret void
12;
13  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
14  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
15  ret void
16}
17
18define amdgpu_kernel void @sqrt_fpmath_f32(ptr addrspace(1) %out, float %x) {
19; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32
20; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1:[0-9]+]] {
21; IEEE-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
22; IEEE-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
23; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
24; IEEE-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
25; IEEE-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
26; IEEE-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
27; IEEE-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000
28; IEEE-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
29; IEEE-NEXT:    [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]])
30; IEEE-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
31; IEEE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
32; IEEE-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
33; IEEE-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
34; IEEE-NEXT:    [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000
35; IEEE-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0
36; IEEE-NEXT:    [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]])
37; IEEE-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]])
38; IEEE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0
39; IEEE-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]])
40; IEEE-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
41; IEEE-NEXT:    [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000
42; IEEE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0
43; IEEE-NEXT:    [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]])
44; IEEE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]])
45; IEEE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0
46; IEEE-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]])
47; IEEE-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
48; IEEE-NEXT:    ret void
49;
50; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32
51; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1:[0-9]+]] {
52; DAZ-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
53; DAZ-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
54; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
55; DAZ-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
56; DAZ-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
57; DAZ-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
58; DAZ-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
59; DAZ-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
60; DAZ-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
61; DAZ-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
62; DAZ-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
63; DAZ-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
64; DAZ-NEXT:    ret void
65;
66  %no.md = call float @llvm.sqrt.f32(float %x)
67  store volatile float %no.md, ptr addrspace(1) %out, align 4
68
69  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
70  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
71
72  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
73  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
74
75  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
76  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
77
78  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
79  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
80
81  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
82  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
83  ret void
84}
85
86define amdgpu_kernel void @sqrt_fpmath_v2f32(ptr addrspace(1) %out, <2 x float> %x) {
87; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_v2f32
88; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
89; IEEE-NEXT:    [[NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
90; IEEE-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
91; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !1
92; IEEE-NEXT:    store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
93; IEEE-NEXT:    [[MD_1ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
94; IEEE-NEXT:    store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
95; IEEE-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
96; IEEE-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
97; IEEE-NEXT:    [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000
98; IEEE-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0
99; IEEE-NEXT:    [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]])
100; IEEE-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]])
101; IEEE-NEXT:    [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0
102; IEEE-NEXT:    [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]])
103; IEEE-NEXT:    [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000
104; IEEE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0
105; IEEE-NEXT:    [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]])
106; IEEE-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]])
107; IEEE-NEXT:    [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0
108; IEEE-NEXT:    [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
109; IEEE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
110; IEEE-NEXT:    [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
111; IEEE-NEXT:    store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
112; IEEE-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
113; IEEE-NEXT:    [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
114; IEEE-NEXT:    [[TMP18:%.*]] = fcmp olt float [[TMP16]], 0x3810000000000000
115; IEEE-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 32, i32 0
116; IEEE-NEXT:    [[TMP20:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP19]])
117; IEEE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP20]])
118; IEEE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP18]], i32 -16, i32 0
119; IEEE-NEXT:    [[TMP23:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP21]], i32 [[TMP22]])
120; IEEE-NEXT:    [[TMP24:%.*]] = fcmp olt float [[TMP17]], 0x3810000000000000
121; IEEE-NEXT:    [[TMP25:%.*]] = select i1 [[TMP24]], i32 32, i32 0
122; IEEE-NEXT:    [[TMP26:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP25]])
123; IEEE-NEXT:    [[TMP27:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP26]])
124; IEEE-NEXT:    [[TMP28:%.*]] = select i1 [[TMP24]], i32 -16, i32 0
125; IEEE-NEXT:    [[TMP29:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP27]], i32 [[TMP28]])
126; IEEE-NEXT:    [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0
127; IEEE-NEXT:    [[MD_3ULP:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1
128; IEEE-NEXT:    store volatile <2 x float> [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
129; IEEE-NEXT:    [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0
130; IEEE-NEXT:    [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1
131; IEEE-NEXT:    [[TMP33:%.*]] = fcmp olt float [[TMP31]], 0x3810000000000000
132; IEEE-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 32, i32 0
133; IEEE-NEXT:    [[TMP35:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP34]])
134; IEEE-NEXT:    [[TMP36:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP35]])
135; IEEE-NEXT:    [[TMP37:%.*]] = select i1 [[TMP33]], i32 -16, i32 0
136; IEEE-NEXT:    [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP36]], i32 [[TMP37]])
137; IEEE-NEXT:    [[TMP39:%.*]] = fcmp olt float [[TMP32]], 0x3810000000000000
138; IEEE-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], i32 32, i32 0
139; IEEE-NEXT:    [[TMP41:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP40]])
140; IEEE-NEXT:    [[TMP42:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP41]])
141; IEEE-NEXT:    [[TMP43:%.*]] = select i1 [[TMP39]], i32 -16, i32 0
142; IEEE-NEXT:    [[TMP44:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]])
143; IEEE-NEXT:    [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0
144; IEEE-NEXT:    [[MD_2ULP:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1
145; IEEE-NEXT:    store volatile <2 x float> [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
146; IEEE-NEXT:    ret void
147;
148; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_v2f32
149; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
150; DAZ-NEXT:    [[NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
151; DAZ-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
152; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !1
153; DAZ-NEXT:    store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
154; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
155; DAZ-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
156; DAZ-NEXT:    [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
157; DAZ-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
158; DAZ-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
159; DAZ-NEXT:    [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
160; DAZ-NEXT:    store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
161; DAZ-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0
162; DAZ-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1
163; DAZ-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP6]])
164; DAZ-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP7]])
165; DAZ-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
166; DAZ-NEXT:    [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i64 1
167; DAZ-NEXT:    store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
168; DAZ-NEXT:    [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 0
169; DAZ-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[X]], i64 1
170; DAZ-NEXT:    [[TMP13:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]])
171; DAZ-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP12]])
172; DAZ-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0
173; DAZ-NEXT:    [[MD_3ULP:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
174; DAZ-NEXT:    store volatile <2 x float> [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
175; DAZ-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
176; DAZ-NEXT:    [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
177; DAZ-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP16]])
178; DAZ-NEXT:    [[TMP19:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP17]])
179; DAZ-NEXT:    [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP18]], i64 0
180; DAZ-NEXT:    [[MD_2ULP:%.*]] = insertelement <2 x float> [[TMP20]], float [[TMP19]], i64 1
181; DAZ-NEXT:    store volatile <2 x float> [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
182; DAZ-NEXT:    ret void
183;
184  %no.md = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
185  store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4
186
187  %md.half.ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1
188  store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 4
189
190  %md.1ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
191  store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4
192
193  %md.25ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !0
194  store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 4
195
196  %md.3ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3
197  store volatile <2 x float> %md.3ulp, ptr addrspace(1) %out, align 4
198
199  %md.2ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
200  store volatile <2 x float> %md.2ulp, ptr addrspace(1) %out, align 4
201  ret void
202}
203
204define amdgpu_kernel void @sqrt_fpmath_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %x) {
205; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nosub
206; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[X:%.*]]) #[[ATTR1:[0-9]+]] {
207; CHECK-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
208; CHECK-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
209; CHECK-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
210; CHECK-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
211; CHECK-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
212; CHECK-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
213; CHECK-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
214; CHECK-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
215; CHECK-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
216; CHECK-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
217; CHECK-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
218; CHECK-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
219; CHECK-NEXT:    ret void
220;
221  %no.md = call float @llvm.sqrt.f32(float %x)
222  store volatile float %no.md, ptr addrspace(1) %out, align 4
223
224  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
225  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
226
227  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
228  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
229
230  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
231  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
232
233  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
234  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
235
236  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
237  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
238  ret void
239}
240
241define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero(ptr addrspace(1) %out, float nofpclass(nzero) %x) {
242; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero
243; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero) [[X:%.*]]) #[[ATTR1]] {
244; IEEE-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
245; IEEE-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
246; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
247; IEEE-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
248; IEEE-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
249; IEEE-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
250; IEEE-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000
251; IEEE-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
252; IEEE-NEXT:    [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]])
253; IEEE-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
254; IEEE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
255; IEEE-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
256; IEEE-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
257; IEEE-NEXT:    [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000
258; IEEE-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0
259; IEEE-NEXT:    [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]])
260; IEEE-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]])
261; IEEE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0
262; IEEE-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]])
263; IEEE-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
264; IEEE-NEXT:    [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000
265; IEEE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0
266; IEEE-NEXT:    [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]])
267; IEEE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]])
268; IEEE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0
269; IEEE-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]])
270; IEEE-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
271; IEEE-NEXT:    ret void
272;
273; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero
274; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero) [[X:%.*]]) #[[ATTR1]] {
275; DAZ-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
276; DAZ-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
277; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
278; DAZ-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
279; DAZ-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
280; DAZ-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
281; DAZ-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
282; DAZ-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
283; DAZ-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
284; DAZ-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
285; DAZ-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
286; DAZ-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
287; DAZ-NEXT:    ret void
288;
289  %no.md = call float @llvm.sqrt.f32(float %x)
290  store volatile float %no.md, ptr addrspace(1) %out, align 4
291
292  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
293  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
294
295  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
296  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
297
298  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
299  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
300
301  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
302  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
303
304  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
305  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
306  ret void
307}
308
309define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub(ptr addrspace(1) %out, float nofpclass(nzero nsub) %x) {
310; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub
311; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero nsub) [[X:%.*]]) #[[ATTR1]] {
312; IEEE-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
313; IEEE-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
314; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
315; IEEE-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
316; IEEE-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
317; IEEE-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
318; IEEE-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000
319; IEEE-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
320; IEEE-NEXT:    [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]])
321; IEEE-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
322; IEEE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
323; IEEE-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
324; IEEE-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
325; IEEE-NEXT:    [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000
326; IEEE-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0
327; IEEE-NEXT:    [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]])
328; IEEE-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]])
329; IEEE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0
330; IEEE-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]])
331; IEEE-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
332; IEEE-NEXT:    [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000
333; IEEE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0
334; IEEE-NEXT:    [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]])
335; IEEE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]])
336; IEEE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0
337; IEEE-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]])
338; IEEE-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
339; IEEE-NEXT:    ret void
340;
341; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub
342; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero nsub) [[X:%.*]]) #[[ATTR1]] {
343; DAZ-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
344; DAZ-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
345; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
346; DAZ-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
347; DAZ-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
348; DAZ-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
349; DAZ-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
350; DAZ-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
351; DAZ-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
352; DAZ-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
353; DAZ-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
354; DAZ-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
355; DAZ-NEXT:    ret void
356;
357  %no.md = call float @llvm.sqrt.f32(float %x)
358  store volatile float %no.md, ptr addrspace(1) %out, align 4
359
360  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
361  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
362
363  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
364  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
365
366  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
367  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
368
369  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
370  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
371
372  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
373  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
374  ret void
375}
376
377define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf(ptr addrspace(1) %out, float nofpclass(nzero nsub inf) %x) {
378; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf
379; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(inf nzero nsub) [[X:%.*]]) #[[ATTR1]] {
380; IEEE-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
381; IEEE-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
382; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
383; IEEE-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
384; IEEE-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
385; IEEE-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
386; IEEE-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000
387; IEEE-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
388; IEEE-NEXT:    [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]])
389; IEEE-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
390; IEEE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
391; IEEE-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
392; IEEE-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
393; IEEE-NEXT:    [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000
394; IEEE-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0
395; IEEE-NEXT:    [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]])
396; IEEE-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]])
397; IEEE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0
398; IEEE-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]])
399; IEEE-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
400; IEEE-NEXT:    [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000
401; IEEE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0
402; IEEE-NEXT:    [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]])
403; IEEE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]])
404; IEEE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0
405; IEEE-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]])
406; IEEE-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
407; IEEE-NEXT:    ret void
408;
409; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf
410; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(inf nzero nsub) [[X:%.*]]) #[[ATTR1]] {
411; DAZ-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
412; DAZ-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
413; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
414; DAZ-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
415; DAZ-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
416; DAZ-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
417; DAZ-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
418; DAZ-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
419; DAZ-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
420; DAZ-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
421; DAZ-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
422; DAZ-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
423; DAZ-NEXT:    ret void
424;
425  %no.md = call float @llvm.sqrt.f32(float %x)
426  store volatile float %no.md, ptr addrspace(1) %out, align 4
427
428  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
429  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
430
431  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
432  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
433
434  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
435  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
436
437  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
438  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
439
440  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
441  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
442  ret void
443}
444
445define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub(ptr addrspace(1) %out, float nofpclass(psub) %x) {
446; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub
447; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(psub) [[X:%.*]]) #[[ATTR1]] {
448; IEEE-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
449; IEEE-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
450; IEEE-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
451; IEEE-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
452; IEEE-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
453; IEEE-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
454; IEEE-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000
455; IEEE-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
456; IEEE-NEXT:    [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]])
457; IEEE-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
458; IEEE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
459; IEEE-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
460; IEEE-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
461; IEEE-NEXT:    [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000
462; IEEE-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0
463; IEEE-NEXT:    [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]])
464; IEEE-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]])
465; IEEE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0
466; IEEE-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]])
467; IEEE-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
468; IEEE-NEXT:    [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000
469; IEEE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0
470; IEEE-NEXT:    [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]])
471; IEEE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]])
472; IEEE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0
473; IEEE-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]])
474; IEEE-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
475; IEEE-NEXT:    ret void
476;
477; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub
478; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(psub) [[X:%.*]]) #[[ATTR1]] {
479; DAZ-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
480; DAZ-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
481; DAZ-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
482; DAZ-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
483; DAZ-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
484; DAZ-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
485; DAZ-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
486; DAZ-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
487; DAZ-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
488; DAZ-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
489; DAZ-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
490; DAZ-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
491; DAZ-NEXT:    ret void
492;
493  %no.md = call float @llvm.sqrt.f32(float %x)
494  store volatile float %no.md, ptr addrspace(1) %out, align 4
495
496  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
497  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
498
499  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
500  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
501
502  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
503  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
504
505  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
506  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
507
508  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
509  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
510  ret void
511}
512
513define amdgpu_kernel void @sqrt_fpmath_f32_afn(ptr addrspace(1) %out, float %x) {
514; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_afn
515; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
516; CHECK-NEXT:    [[NO_MD:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
517; CHECK-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
518; CHECK-NEXT:    [[MD_HALF_ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !1
519; CHECK-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
520; CHECK-NEXT:    [[MD_1ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !2
521; CHECK-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
522; CHECK-NEXT:    [[MD_25ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !3
523; CHECK-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
524; CHECK-NEXT:    [[MD_3ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
525; CHECK-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
526; CHECK-NEXT:    [[MD_2ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !4
527; CHECK-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
528; CHECK-NEXT:    ret void
529;
530  %no.md = call afn float @llvm.sqrt.f32(float %x)
531  store volatile float %no.md, ptr addrspace(1) %out, align 4
532
533  %md.half.ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !1
534  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
535
536  %md.1ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !2
537  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
538
539  %md.25ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !0
540  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
541
542  %md.3ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !3
543  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
544
545  %md.2ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !4
546  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
547  ret void
548}
549
550define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub(ptr addrspace(1) %out, float %x) {
551; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub
552; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
553; CHECK-NEXT:    [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
554; CHECK-NEXT:    [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
555; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
556; CHECK-NEXT:    [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
557; CHECK-NEXT:    store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
558; CHECK-NEXT:    [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
559; CHECK-NEXT:    store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
560; CHECK-NEXT:    [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
561; CHECK-NEXT:    store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
562; CHECK-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
563; CHECK-NEXT:    store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
564; CHECK-NEXT:    [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
565; CHECK-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
566; CHECK-NEXT:    [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
567; CHECK-NEXT:    store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
568; CHECK-NEXT:    [[MD_3ULP_AFN:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
569; CHECK-NEXT:    store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
570; CHECK-NEXT:    [[NO_MD_AFN:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
571; CHECK-NEXT:    store volatile float [[NO_MD_AFN]], ptr addrspace(1) [[OUT]], align 4
572; CHECK-NEXT:    ret void
573;
574  %fabs.x = call float @llvm.fabs.f32(float %x)
575  %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
576  call void @llvm.assume(i1 %is.not.subnormal)
577
578  %no.md = call float @llvm.sqrt.f32(float %x)
579  store volatile float %no.md, ptr addrspace(1) %out, align 4
580
581  %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
582  store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
583
584  %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
585  store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
586
587  %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
588  store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
589
590  %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
591  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
592
593  %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
594  store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
595
596  %md.3ulp.afn = call afn float @llvm.sqrt.f32(float %x), !fpmath !3
597  store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
598
599  %no.md.afn = call afn float @llvm.sqrt.f32(float %x)
600  store volatile float %no.md.afn, ptr addrspace(1) %out, align 4
601
602  ret void
603}
604
605declare float @llvm.sqrt.f32(float)
606declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
607declare float @llvm.fabs.f32(float)
608declare void @llvm.assume(i1 noundef)
609
610attributes #0 = { optnone noinline }
611
612!0 = !{float 2.500000e+00}
613!1 = !{float 5.000000e-01}
614!2 = !{float 1.000000e+00}
615!3 = !{float 3.000000e+00}
616!4 = !{float 2.000000e+00}
617