xref: /llvm-project/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (revision 7dbd6cd2946ec3a9b4ad2dfd7ead177baac15bd7)
1; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
2; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
3; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
4; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
5; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
6; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
7
8; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
9; GCN-POSTLINK: call fast float @_Z3sinf(
10; GCN-POSTLINK: call fast float @_Z3cosf(
11; GCN-PRELINK: call fast float @_Z6sincosfPU3AS5f(
12; GCN-NATIVE: call fast float @_Z10native_sinf(
13; GCN-NATIVE: call fast float @_Z10native_cosf(
14define amdgpu_kernel void @test_sincos(ptr addrspace(1) nocapture %a) {
15entry:
16  %tmp = load float, ptr addrspace(1) %a, align 4
17  %call = call fast float @_Z3sinf(float %tmp)
18  store float %call, ptr addrspace(1) %a, align 4
19  %call2 = call fast float @_Z3cosf(float %tmp)
20  %arrayidx3 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
21  store float %call2, ptr addrspace(1) %arrayidx3, align 4
22  ret void
23}
24
25declare float @_Z3sinf(float)
26
27declare float @_Z3cosf(float)
28
29; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
30; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f(
31; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f(
32; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPU3AS5S_(
33; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f(
34; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f(
35define amdgpu_kernel void @test_sincos_v2(ptr addrspace(1) nocapture %a) {
36entry:
37  %tmp = load <2 x float>, ptr addrspace(1) %a, align 8
38  %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
39  store <2 x float> %call, ptr addrspace(1) %a, align 8
40  %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
41  %arrayidx3 = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i64 1
42  store <2 x float> %call2, ptr addrspace(1) %arrayidx3, align 8
43  ret void
44}
45
46declare <2 x float> @_Z3sinDv2_f(<2 x float>)
47
48declare <2 x float> @_Z3cosDv2_f(<2 x float>)
49
50; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
51; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f(
52; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f(
53; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPU3AS5S_(
54; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f(
55; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f(
56define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) {
57entry:
58  %loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16
59  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
60  %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
61  %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
62  store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16
63  %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
64  %arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1
65  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
66  store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16
67  ret void
68}
69
70declare <3 x float> @_Z3sinDv3_f(<3 x float>)
71
72declare <3 x float> @_Z3cosDv3_f(<3 x float>)
73
74; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
75; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f(
76; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f(
77; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPU3AS5S_(
78; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f(
79; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f(
80define amdgpu_kernel void @test_sincos_v4(ptr addrspace(1) nocapture %a) {
81entry:
82  %tmp = load <4 x float>, ptr addrspace(1) %a, align 16
83  %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
84  store <4 x float> %call, ptr addrspace(1) %a, align 16
85  %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
86  %arrayidx3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i64 1
87  store <4 x float> %call2, ptr addrspace(1) %arrayidx3, align 16
88  ret void
89}
90
91declare <4 x float> @_Z3sinDv4_f(<4 x float>)
92
93declare <4 x float> @_Z3cosDv4_f(<4 x float>)
94
95; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
96; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f(
97; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f(
98; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPU3AS5S_(
99; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f(
100; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f(
101define amdgpu_kernel void @test_sincos_v8(ptr addrspace(1) nocapture %a) {
102entry:
103  %tmp = load <8 x float>, ptr addrspace(1) %a, align 32
104  %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
105  store <8 x float> %call, ptr addrspace(1) %a, align 32
106  %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
107  %arrayidx3 = getelementptr inbounds <8 x float>, ptr addrspace(1) %a, i64 1
108  store <8 x float> %call2, ptr addrspace(1) %arrayidx3, align 32
109  ret void
110}
111
112declare <8 x float> @_Z3sinDv8_f(<8 x float>)
113
114declare <8 x float> @_Z3cosDv8_f(<8 x float>)
115
116; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
117; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f(
118; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f(
119; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPU3AS5S_(
120; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f(
121; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f(
122define amdgpu_kernel void @test_sincos_v16(ptr addrspace(1) nocapture %a) {
123entry:
124  %tmp = load <16 x float>, ptr addrspace(1) %a, align 64
125  %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
126  store <16 x float> %call, ptr addrspace(1) %a, align 64
127  %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
128  %arrayidx3 = getelementptr inbounds <16 x float>, ptr addrspace(1) %a, i64 1
129  store <16 x float> %call2, ptr addrspace(1) %arrayidx3, align 64
130  ret void
131}
132
133declare <16 x float> @_Z3sinDv16_f(<16 x float>)
134
135declare <16 x float> @_Z3cosDv16_f(<16 x float>)
136
137; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip
138; GCN: %call = tail call fast float @_Z12native_recipf(float 3.000000e+00)
139define amdgpu_kernel void @test_native_recip(ptr addrspace(1) nocapture %a) {
140entry:
141  %call = call fast float @_Z12native_recipf(float 3.000000e+00)
142  store float %call, ptr addrspace(1) %a, align 4
143  ret void
144}
145
146declare float @_Z12native_recipf(float)
147
148; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip
149;  GCN: %call = tail call fast float @_Z10half_recipf(float 3.000000e+00)
150define amdgpu_kernel void @test_half_recip(ptr addrspace(1) nocapture %a) {
151entry:
152  %call = call fast float @_Z10half_recipf(float 3.000000e+00)
153  store float %call, ptr addrspace(1) %a, align 4
154  ret void
155}
156
157declare float @_Z10half_recipf(float)
158
159; Do nothing, the underlying implementation will optimize correctly
160; after inlining.
161; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
162; GCN: %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
163define amdgpu_kernel void @test_native_divide(ptr addrspace(1) nocapture %a) {
164entry:
165  %tmp = load float, ptr addrspace(1) %a, align 4
166  %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
167  store float %call, ptr addrspace(1) %a, align 4
168  ret void
169}
170
171declare float @_Z13native_divideff(float, float)
172
173; Do nothing, the optimization will naturally happen after inlining.
174
175; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
176; GCN: %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
177define amdgpu_kernel void @test_half_divide(ptr addrspace(1) nocapture %a) {
178entry:
179  %tmp = load float, ptr addrspace(1) %a, align 4
180  %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
181  store float %call, ptr addrspace(1) %a, align 4
182  ret void
183}
184
185declare float @_Z11half_divideff(float, float)
186
187; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f
188; GCN: store float 1.000000e+00, ptr addrspace(1) %a
189define amdgpu_kernel void @test_pow_0f(ptr addrspace(1) nocapture %a) {
190entry:
191  %tmp = load float, ptr addrspace(1) %a, align 4
192  %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00)
193  store float %call, ptr addrspace(1) %a, align 4
194  ret void
195}
196
197declare float @_Z3powff(float, float)
198
199; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i
200; GCN: store float 1.000000e+00, ptr addrspace(1) %a
201define amdgpu_kernel void @test_pow_0i(ptr addrspace(1) nocapture %a) {
202entry:
203  %tmp = load float, ptr addrspace(1) %a, align 4
204  %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00)
205  store float %call, ptr addrspace(1) %a, align 4
206  ret void
207}
208
209; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f
210; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
211; GCN: store float %tmp, ptr addrspace(1) %a, align 4
212define amdgpu_kernel void @test_pow_1f(ptr addrspace(1) nocapture %a) {
213entry:
214  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
215  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
216  %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00)
217  store float %call, ptr addrspace(1) %a, align 4
218  ret void
219}
220
221; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i
222; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
223; GCN: store float %tmp, ptr addrspace(1) %a, align 4
224define amdgpu_kernel void @test_pow_1i(ptr addrspace(1) nocapture %a) {
225entry:
226  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
227  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
228  %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00)
229  store float %call, ptr addrspace(1) %a, align 4
230  ret void
231}
232
233; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f
234; GCN: %tmp = load float, ptr addrspace(1) %a, align 4
235; GCN: %__pow2 = fmul fast float %tmp, %tmp
236define amdgpu_kernel void @test_pow_2f(ptr addrspace(1) nocapture %a) {
237entry:
238  %tmp = load float, ptr addrspace(1) %a, align 4
239  %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00)
240  store float %call, ptr addrspace(1) %a, align 4
241  ret void
242}
243
244; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i
245; GCN: %tmp = load float, ptr addrspace(1) %a, align 4
246; GCN: %__pow2 = fmul fast float %tmp, %tmp
247define amdgpu_kernel void @test_pow_2i(ptr addrspace(1) nocapture %a) {
248entry:
249  %tmp = load float, ptr addrspace(1) %a, align 4
250  %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00)
251  store float %call, ptr addrspace(1) %a, align 4
252  ret void
253}
254
255; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f
256; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
257; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
258define amdgpu_kernel void @test_pow_m1f(ptr addrspace(1) nocapture %a) {
259entry:
260  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
261  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
262  %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00)
263  store float %call, ptr addrspace(1) %a, align 4
264  ret void
265}
266
267; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i
268; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
269; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
270define amdgpu_kernel void @test_pow_m1i(ptr addrspace(1) nocapture %a) {
271entry:
272  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
273  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
274  %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00)
275  store float %call, ptr addrspace(1) %a, align 4
276  ret void
277}
278
279; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
280; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01)
281; GCN-PRELINK: %__pow2sqrt = tail call fast float @llvm.sqrt.f32(float %tmp)
282define amdgpu_kernel void @test_pow_half(ptr addrspace(1) nocapture %a) {
283entry:
284  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
285  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
286  %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01)
287  store float %call, ptr addrspace(1) %a, align 4
288  ret void
289}
290
291; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
292; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01)
293; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
294define amdgpu_kernel void @test_pow_mhalf(ptr addrspace(1) nocapture %a) {
295entry:
296  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
297  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
298  %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01)
299  store float %call, ptr addrspace(1) %a, align 4
300  ret void
301}
302
303; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c
304; GCN: %__powx2 = fmul fast float %tmp, %tmp
305; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
306; GCN: %__powx22 = fmul fast float %__powx2, %tmp
307; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
308; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
309define amdgpu_kernel void @test_pow_c(ptr addrspace(1) nocapture %a) {
310entry:
311  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
312  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
313  %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01)
314  store float %call, ptr addrspace(1) %a, align 4
315  ret void
316}
317
318; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c
319; GCN: %__powx2 = fmul fast float %tmp, %tmp
320; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
321; GCN: %__powx22 = fmul fast float %__powx2, %tmp
322; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
323; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
324define amdgpu_kernel void @test_powr_c(ptr addrspace(1) nocapture %a) {
325entry:
326  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
327  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
328  %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
329  store float %call, ptr addrspace(1) %a, align 4
330  ret void
331}
332
333declare float @_Z4powrff(float, float)
334
335; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c
336; GCN: %__powx2 = fmul fast float %tmp, %tmp
337; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
338; GCN: %__powx22 = fmul fast float %__powx2, %tmp
339; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
340; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
341define amdgpu_kernel void @test_pown_c(ptr addrspace(1) nocapture %a) {
342entry:
343  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
344  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
345  %call = call fast float @_Z4pownfi(float %tmp, i32 11)
346  store float %call, ptr addrspace(1) %a, align 4
347  ret void
348}
349
350declare half @_Z4pownDhi(half, i32)
351
352; GCN-LABEL: {{^}}define half @test_pown_f16(
353; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
354; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
355; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
356; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
357; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
358; GCN-NATIVE: %__ytou = trunc i32 %y to i16
359; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
360; GCN-NATIVE: %0 = bitcast half %x to i16
361; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
362; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
363; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
364; GCN-NATIVE: %3 = bitcast i16 %2 to half
365define half @test_pown_f16(half %x, i32 %y) {
366entry:
367  %call = call fast half @_Z4pownDhi(half %x, i32 %y)
368  ret half %call
369}
370
371declare float @_Z4pownfi(float, i32)
372
373; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
374; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
375; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
376; GCN: %__ylogx = fmul fast float %__log2, 1.013000e+03
377; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
378; GCN: %[[r0:.*]] = tail call float @llvm.copysign.f32(float %__exp2, float %tmp)
379; GCN: store float %[[r0]], ptr addrspace(1) %a, align 4
380define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
381entry:
382  %tmp = load float, ptr addrspace(1) %a, align 4
383  %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03)
384  store float %call, ptr addrspace(1) %a, align 4
385  ret void
386}
387
388; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
389; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp)
390; GCN: %__ylogx = fmul fast float %tmp1, %__log2
391; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
392; GCN: store float %__exp2, ptr addrspace(1) %a, align 4
393define amdgpu_kernel void @test_powr(ptr addrspace(1) nocapture %a) {
394entry:
395  %tmp = load float, ptr addrspace(1) %a, align 4
396  %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
397  %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
398  %call = call fast float @_Z4powrff(float %tmp, float %tmp1)
399  store float %call, ptr addrspace(1) %a, align 4
400  ret void
401}
402
403; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
404; GCN: %conv = fptosi float %tmp1 to i32
405; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
406; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
407; GCN: %pownI2F = sitofp i32 %conv to float
408; GCN: %__ylogx = fmul fast float %__log2, %pownI2F
409; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
410; GCN: %__yeven = shl i32 %conv, 31
411; GCN: %[[r0:.*]] = bitcast float %tmp to i32
412; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
413; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
414; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
415; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
416define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
417entry:
418  %tmp = load float, ptr addrspace(1) %a, align 4
419  %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
420  %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
421  %conv = fptosi float %tmp1 to i32
422  %call = call fast float @_Z4pownfi(float %tmp, i32 %conv)
423  store float %call, ptr addrspace(1) %a, align 4
424  ret void
425}
426
427declare half @_Z3powDhDh(half, half)
428declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
429
430; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)
431; GCN: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
432; GCN: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
433; GCN: %__ylogx = fmul fast half %__log2, 0xH4A80
434; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
435; GCN: %1 = tail call half @llvm.copysign.f16(half %__exp2, half %x)
436define half @test_pow_fast_f16__y_13(half %x) {
437  %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
438  ret half %powr
439}
440
441; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)
442; GCN: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
443; GCN: %__log2 = tail call fast <2 x half> @llvm.log2.v2f16(<2 x half> %__fabs)
444; GCN: %__ylogx = fmul fast <2 x half> %__log2, splat (half 0xH4A80)
445; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx)
446; GCN: %1 = tail call <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %x)
447define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
448  %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
449  ret <2 x half> %powr
450}
451
452; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
453; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
454; GCN: store float %tmp, ptr addrspace(1) %a, align 4
455define amdgpu_kernel void @test_rootn_1(ptr addrspace(1) nocapture %a) {
456entry:
457  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
458  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
459  %call = call fast float @_Z5rootnfi(float %tmp, i32 1)
460  store float %call, ptr addrspace(1) %a, align 4
461  ret void
462}
463
464declare float @_Z5rootnfi(float, i32)
465
466; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
467; GCN: call fast float @llvm.sqrt.f32(float %tmp)
468define amdgpu_kernel void @test_rootn_2(ptr addrspace(1) nocapture %a) {
469entry:
470  %tmp = load float, ptr addrspace(1) %a, align 4
471  %call = call fast float @_Z5rootnfi(float %tmp, i32 2)
472  store float %call, ptr addrspace(1) %a, align 4
473  ret void
474}
475
476; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
477; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3)
478; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp)
479define amdgpu_kernel void @test_rootn_3(ptr addrspace(1) nocapture %a) {
480entry:
481  %tmp = load float, ptr addrspace(1) %a, align 4
482  %call = call fast float @_Z5rootnfi(float %tmp, i32 3)
483  store float %call, ptr addrspace(1) %a, align 4
484  ret void
485}
486
487; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1
488; GCN: fdiv fast float 1.000000e+00, %tmp
489define amdgpu_kernel void @test_rootn_m1(ptr addrspace(1) nocapture %a) {
490entry:
491  %tmp = load float, ptr addrspace(1) %a, align 4
492  %call = call fast float @_Z5rootnfi(float %tmp, i32 -1)
493  store float %call, ptr addrspace(1) %a, align 4
494  ret void
495}
496
497; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
498; GCN: [[SQRT:%.+]] = tail call fast float @llvm.sqrt.f32(float %tmp)
499; GCN-NEXT: fdiv fast float 1.000000e+00, [[SQRT]]
500define amdgpu_kernel void @test_rootn_m2(ptr addrspace(1) nocapture %a) {
501entry:
502  %tmp = load float, ptr addrspace(1) %a, align 4
503  %call = call fast float @_Z5rootnfi(float %tmp, i32 -2)
504  store float %call, ptr addrspace(1) %a, align 4
505  ret void
506}
507
508; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x
509; GCN: store float %y
510define amdgpu_kernel void @test_fma_0x(ptr addrspace(1) nocapture %a, float %y) {
511entry:
512  %tmp = load float, ptr addrspace(1) %a, align 4
513  %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
514  store float %call, ptr addrspace(1) %a, align 4
515  ret void
516}
517
518declare float @_Z3fmafff(float, float, float)
519
520; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0
521; GCN: store float %y,
522define amdgpu_kernel void @test_fma_x0(ptr addrspace(1) nocapture %a, float %y) {
523entry:
524  %tmp = load float, ptr addrspace(1) %a, align 4
525  %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
526  store float %call, ptr addrspace(1) %a, align 4
527  ret void
528}
529
530; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x
531; GCN: store float %y,
532define amdgpu_kernel void @test_mad_0x(ptr addrspace(1) nocapture %a, float %y) {
533entry:
534  %tmp = load float, ptr addrspace(1) %a, align 4
535  %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
536  store float %call, ptr addrspace(1) %a, align 4
537  ret void
538}
539
540declare float @_Z3madfff(float, float, float)
541
542; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0
543; GCN: store float %y,
544define amdgpu_kernel void @test_mad_x0(ptr addrspace(1) nocapture %a, float %y) {
545entry:
546  %tmp = load float, ptr addrspace(1) %a, align 4
547  %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
548  store float %call, ptr addrspace(1) %a, align 4
549  ret void
550}
551
552; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y
553; GCN: %call = fadd fast float %tmp, %y
554define amdgpu_kernel void @test_fma_x1y(ptr addrspace(1) nocapture %a, float %y) {
555entry:
556  %tmp = load float, ptr addrspace(1) %a, align 4
557  %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
558  store float %call, ptr addrspace(1) %a, align 4
559  ret void
560}
561
562; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy
563; GCN: %call = fadd fast float %tmp, %y
564define amdgpu_kernel void @test_fma_1xy(ptr addrspace(1) nocapture %a, float %y) {
565entry:
566  %tmp = load float, ptr addrspace(1) %a, align 4
567  %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
568  store float %call, ptr addrspace(1) %a, align 4
569  ret void
570}
571
572; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0
573; GCN: %call = fmul fast float %tmp1, %tmp
574define amdgpu_kernel void @test_fma_xy0(ptr addrspace(1) nocapture %a) {
575entry:
576  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
577  %tmp = load float, ptr addrspace(1) %arrayidx, align 4
578  %tmp1 = load float, ptr addrspace(1) %a, align 4
579  %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
580  store float %call, ptr addrspace(1) %a, align 4
581  ret void
582}
583
584; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
585; GCN-NATIVE: call fast float @llvm.exp.f32(float %tmp)
586define amdgpu_kernel void @test_use_native_exp(ptr addrspace(1) nocapture %a) {
587entry:
588  %tmp = load float, ptr addrspace(1) %a, align 4
589  %call = call fast float @_Z3expf(float %tmp)
590  store float %call, ptr addrspace(1) %a, align 4
591  ret void
592}
593
594declare float @_Z3expf(float)
595
596; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
597; GCN-NATIVE: call fast float @llvm.exp2.f32(float %tmp)
598define amdgpu_kernel void @test_use_native_exp2(ptr addrspace(1) nocapture %a) {
599entry:
600  %tmp = load float, ptr addrspace(1) %a, align 4
601  %call = call fast float @_Z4exp2f(float %tmp)
602  store float %call, ptr addrspace(1) %a, align 4
603  ret void
604}
605
606declare float @_Z4exp2f(float)
607
608; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
609; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp)
610define amdgpu_kernel void @test_use_native_exp10(ptr addrspace(1) nocapture %a) {
611entry:
612  %tmp = load float, ptr addrspace(1) %a, align 4
613  %call = call fast float @_Z5exp10f(float %tmp)
614  store float %call, ptr addrspace(1) %a, align 4
615  ret void
616}
617
618declare float @_Z5exp10f(float)
619
620; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
621; GCN-NATIVE: call fast float @llvm.log.f32(float %tmp)
622define amdgpu_kernel void @test_use_native_log(ptr addrspace(1) nocapture %a) {
623entry:
624  %tmp = load float, ptr addrspace(1) %a, align 4
625  %call = call fast float @_Z3logf(float %tmp)
626  store float %call, ptr addrspace(1) %a, align 4
627  ret void
628}
629
630declare float @_Z3logf(float)
631
632; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
633; GCN-NATIVE: call fast float @llvm.log2.f32(float %tmp)
634define amdgpu_kernel void @test_use_native_log2(ptr addrspace(1) nocapture %a) {
635entry:
636  %tmp = load float, ptr addrspace(1) %a, align 4
637  %call = call fast float @_Z4log2f(float %tmp)
638  store float %call, ptr addrspace(1) %a, align 4
639  ret void
640}
641
642declare float @_Z4log2f(float)
643
644; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
645; GCN-NATIVE: call fast float @llvm.log10.f32(float %tmp)
646define amdgpu_kernel void @test_use_native_log10(ptr addrspace(1) nocapture %a) {
647entry:
648  %tmp = load float, ptr addrspace(1) %a, align 4
649  %call = call fast float @_Z5log10f(float %tmp)
650  store float %call, ptr addrspace(1) %a, align 4
651  ret void
652}
653
654declare float @_Z5log10f(float)
655
656; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
657; GCN: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
658; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp)
659; GCN: %__ylogx = fmul fast float %tmp1, %__log2
660; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
661; GCN: store float %__exp2, ptr addrspace(1) %a, align 4
662define amdgpu_kernel void @test_use_native_powr(ptr addrspace(1) nocapture %a) {
663entry:
664  %tmp = load float, ptr addrspace(1) %a, align 4
665  %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
666  %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
667  %call = call fast float @_Z4powrff(float %tmp, float %tmp1)
668  store float %call, ptr addrspace(1) %a, align 4
669  ret void
670}
671
672; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr_nobuiltin
673; GCN: %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
674define amdgpu_kernel void @test_use_native_powr_nobuiltin(ptr addrspace(1) nocapture %a) {
675entry:
676  %tmp = load float, ptr addrspace(1) %a, align 4
677  %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
678  %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
679  %call = call fast float @_Z4powrff(float %tmp, float %tmp1) nobuiltin
680  store float %call, ptr addrspace(1) %a, align 4
681  ret void
682}
683
684; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
685; GCN-NATIVE: call fast float @llvm.sqrt.f32(float %tmp)
686define amdgpu_kernel void @test_use_native_sqrt(ptr addrspace(1) nocapture %a) {
687entry:
688  %tmp = load float, ptr addrspace(1) %a, align 4
689  %call = call fast float @_Z4sqrtf(float %tmp)
690  store float %call, ptr addrspace(1) %a, align 4
691  ret void
692}
693
694; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
695; GCN: call fast double @llvm.sqrt.f64(double %tmp)
696define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(ptr addrspace(1) nocapture %a) {
697entry:
698  %tmp = load double, ptr addrspace(1) %a, align 8
699  %call = call fast double @_Z4sqrtd(double %tmp)
700  store double %call, ptr addrspace(1) %a, align 8
701  ret void
702}
703
704declare float @_Z4sqrtf(float)
705declare double @_Z4sqrtd(double)
706
707; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
708; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp)
709define amdgpu_kernel void @test_use_native_rsqrt(ptr addrspace(1) nocapture %a) {
710entry:
711  %tmp = load float, ptr addrspace(1) %a, align 4
712  %call = call fast float @_Z5rsqrtf(float %tmp)
713  store float %call, ptr addrspace(1) %a, align 4
714  ret void
715}
716
717declare float @_Z5rsqrtf(float)
718
719; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
720; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp)
721define amdgpu_kernel void @test_use_native_tan(ptr addrspace(1) nocapture %a) {
722entry:
723  %tmp = load float, ptr addrspace(1) %a, align 4
724  %call = call fast float @_Z3tanf(float %tmp)
725  store float %call, ptr addrspace(1) %a, align 4
726  ret void
727}
728
729declare float @_Z3tanf(float)
730
731; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
732; GCN-NATIVE: call float @_Z10native_sinf(float %tmp)
733; GCN-NATIVE: call float @_Z10native_cosf(float %tmp)
734define amdgpu_kernel void @test_use_native_sincos(ptr addrspace(1) %a) {
735entry:
736  %tmp = load float, ptr addrspace(1) %a, align 4
737  %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
738  %tmp1 = addrspacecast ptr addrspace(1) %arrayidx1 to ptr
739  %call = call fast float @_Z6sincosfPf(float %tmp, ptr %tmp1)
740  store float %call, ptr addrspace(1) %a, align 4
741  ret void
742}
743
744declare float @_Z6sincosfPf(float, ptr)
745
746%opencl.pipe_t = type opaque
747%opencl.reserve_id_t = type opaque
748
749; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr)
750; GCN-PRELINK: call i32 @__read_pipe_2_4(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND:[0-9]+]]
751; GCN-PRELINK: call i32 @__read_pipe_4_4(ptr addrspace(1) %{{.*}}, ptr addrspace(5) %{{.*}}, i32 2, ptr %{{.*}}) #[[$NOUNWIND]]
752define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr {
753entry:
754  %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr
755  %tmp2 = call i32 @__read_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0
756  %tmp3 = call ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4)
757  %tmp4 = call i32 @__read_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0
758  call void @__commit_read_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4)
759  ret void
760}
761
762declare i32 @__read_pipe_2(ptr addrspace(1), ptr, i32, i32)
763
764declare ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1), i32, i32, i32)
765
766declare i32 @__read_pipe_4(ptr addrspace(1), ptr addrspace(5), i32, ptr, i32, i32)
767
768declare void @__commit_read_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32)
769
770; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr)
771; GCN-PRELINK: call i32 @__write_pipe_2_4(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND]]
772; GCN-PRELINK: call i32 @__write_pipe_4_4(ptr addrspace(1) %{{.*}}, ptr addrspace(5) %{{.*}}, i32 2, ptr %{{.*}}) #[[$NOUNWIND]]
773define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr {
774entry:
775  %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr
776  %tmp2 = call i32 @__write_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0
777  %tmp3 = call ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) #0
778  %tmp4 = call i32 @__write_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0
779  call void @__commit_write_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) #0
780  ret void
781}
782
783declare i32 @__write_pipe_2(ptr addrspace(1), ptr, i32, i32) local_unnamed_addr
784
785declare ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1), i32, i32, i32) local_unnamed_addr
786
787declare i32 @__write_pipe_4(ptr addrspace(1), ptr addrspace(5), i32, ptr, i32, i32) local_unnamed_addr
788
789declare void @__commit_write_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32) local_unnamed_addr
790
791%struct.S = type { [100 x i32] }
792
793; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
794; GCN-PRELINK: call i32 @__read_pipe_2_1(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]]
795; GCN-PRELINK: call i32 @__read_pipe_2_2(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]]
796; GCN-PRELINK: call i32 @__read_pipe_2_4(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]]
797; GCN-PRELINK: call i32 @__read_pipe_2_8(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]]
798; GCN-PRELINK: call i32 @__read_pipe_2_16(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND]]
799; GCN-PRELINK: call i32 @__read_pipe_2_32(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]]
800; GCN-PRELINK: call i32 @__read_pipe_2_64(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]]
801; GCN-PRELINK: call i32 @__read_pipe_2_128(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]]
802; GCN-PRELINK: call i32 @__read_pipe_2(ptr addrspace(1) %{{.*}}, ptr %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
803define amdgpu_kernel void @test_pipe_size(ptr addrspace(1) %p1, ptr addrspace(1) %ptr1, ptr addrspace(1) %p2, ptr addrspace(1) %ptr2, ptr addrspace(1) %p4, ptr addrspace(1) %ptr4, ptr addrspace(1) %p8, ptr addrspace(1) %ptr8, ptr addrspace(1) %p16, ptr addrspace(1) %ptr16, ptr addrspace(1) %p32, ptr addrspace(1) %ptr32, ptr addrspace(1) %p64, ptr addrspace(1) %ptr64, ptr addrspace(1) %p128, ptr addrspace(1) %ptr128, ptr addrspace(1) %pu, ptr addrspace(1) %ptru) local_unnamed_addr #0 {
804entry:
805  %tmp = addrspacecast ptr addrspace(1) %ptr1 to ptr
806  %tmp1 = call i32 @__read_pipe_2(ptr addrspace(1) %p1, ptr %tmp, i32 1, i32 1) #0
807  %tmp3 = addrspacecast ptr addrspace(1) %ptr2 to ptr
808  %tmp4 = call i32 @__read_pipe_2(ptr addrspace(1) %p2, ptr %tmp3, i32 2, i32 2) #0
809  %tmp6 = addrspacecast ptr addrspace(1) %ptr4 to ptr
810  %tmp7 = call i32 @__read_pipe_2(ptr addrspace(1) %p4, ptr %tmp6, i32 4, i32 4) #0
811  %tmp9 = addrspacecast ptr addrspace(1) %ptr8 to ptr
812  %tmp10 = call i32 @__read_pipe_2(ptr addrspace(1) %p8, ptr %tmp9, i32 8, i32 8) #0
813  %tmp12 = addrspacecast ptr addrspace(1) %ptr16 to ptr
814  %tmp13 = call i32 @__read_pipe_2(ptr addrspace(1) %p16, ptr %tmp12, i32 16, i32 16) #0
815  %tmp15 = addrspacecast ptr addrspace(1) %ptr32 to ptr
816  %tmp16 = call i32 @__read_pipe_2(ptr addrspace(1) %p32, ptr %tmp15, i32 32, i32 32) #0
817  %tmp18 = addrspacecast ptr addrspace(1) %ptr64 to ptr
818  %tmp19 = call i32 @__read_pipe_2(ptr addrspace(1) %p64, ptr %tmp18, i32 64, i32 64) #0
819  %tmp21 = addrspacecast ptr addrspace(1) %ptr128 to ptr
820  %tmp22 = call i32 @__read_pipe_2(ptr addrspace(1) %p128, ptr %tmp21, i32 128, i32 128) #0
821  %tmp24 = addrspacecast ptr addrspace(1) %ptru to ptr
822  %tmp25 = call i32 @__read_pipe_2(ptr addrspace(1) %pu, ptr %tmp24, i32 400, i32 4) #0
823  ret void
824}
825
826; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
827
828; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
829; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) "uniform-work-group-size"="false" }
830attributes #0 = { nounwind }
831