xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll (revision 7652a59407018c057cdc1163c9f64b5b6f0954eb)
1; RUN: llc -mtriple=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
3; RUN: llc -mtriple=amdgcn -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
5
6
7define amdgpu_kernel void @divergent_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8; GCN-LABEL: name:            divergent_fneg_f32
9; GCN-LABEL: bb.0 (%ir-block.0)
10; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
11; GCN: V_XOR_B32_e64 killed %[[REG]]
12
13  %tid = call i32 @llvm.amdgcn.workitem.id.x()
14  %tid.ext = sext i32 %tid to i64
15  %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
16  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
17  %val = load volatile float, ptr addrspace(1) %in.gep
18  %fneg = fneg float %val
19  store float %fneg, ptr addrspace(1) %out.gep
20  ret void
21}
22
23define amdgpu_kernel void @uniform_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
24; GCN-LABEL: name:            uniform_fneg_f32
25; GCN-LABEL: bb.0 (%ir-block.0)
26; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
27; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
28
29  %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
30  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
31  %val = load volatile float, ptr addrspace(1) %in.gep
32  %fneg = fneg float %val
33  store float %fneg, ptr addrspace(1) %out.gep
34  ret void
35}
36
37define amdgpu_kernel void @divergent_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
38; GCN-LABEL: name:            divergent_fabs_f32
39; GCN-LABEL: bb.0 (%ir-block.0)
40; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
41; GCN: V_AND_B32_e64 killed %[[REG]]
42
43  %tid = call i32 @llvm.amdgcn.workitem.id.x()
44  %tid.ext = sext i32 %tid to i64
45  %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
46  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
47  %val = load volatile float, ptr addrspace(1) %in.gep
48  %fabs = call float @llvm.fabs.f32(float %val)
49  store float %fabs, ptr addrspace(1) %out.gep
50  ret void
51}
52
53define amdgpu_kernel void @uniform_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
54; GCN-LABEL: name:            uniform_fabs_f32
55; GCN-LABEL: bb.0 (%ir-block.0)
56; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
57; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
58
59  %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
60  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
61  %val = load volatile float, ptr addrspace(1) %in.gep
62  %fabs = call float @llvm.fabs.f32(float %val)
63  store float %fabs, ptr addrspace(1) %out.gep
64  ret void
65}
66
67define amdgpu_kernel void @divergent_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
68; GCN-LABEL: name:            divergent_fneg_fabs_f32
69; GCN-LABEL: bb.0 (%ir-block.0)
70; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
71; GCN: V_OR_B32_e64 killed %[[REG]]
72
73  %tid = call i32 @llvm.amdgcn.workitem.id.x()
74  %tid.ext = sext i32 %tid to i64
75  %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
76  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
77  %val = load volatile float, ptr addrspace(1) %in.gep
78  %fabs = call float @llvm.fabs.f32(float %val)
79  %fneg = fneg float %fabs
80  store float %fneg, ptr addrspace(1) %out.gep
81  ret void
82}
83
84define amdgpu_kernel void @uniform_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
85; GCN-LABEL: name:            uniform_fneg_fabs_f32
86; GCN-LABEL: bb.0 (%ir-block.0)
87; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
88; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
89
90  %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
91  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
92  %val = load volatile float, ptr addrspace(1) %in.gep
93  %fabs = call float @llvm.fabs.f32(float %val)
94  %fneg = fneg float %fabs
95  store float %fneg, ptr addrspace(1) %out.gep
96  ret void
97}
98
99
100define amdgpu_kernel void @divergent_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
101; GCN-LABEL: name:            divergent_fabs_f16
102; GCN-LABEL: bb.0 (%ir-block.0)
103; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
104; FP16: V_AND_B32_e64 killed %[[REG]]
105
106  %tid = call i32 @llvm.amdgcn.workitem.id.x()
107  %tid.ext = sext i32 %tid to i64
108  %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
109  %val = load volatile half, ptr addrspace(1) %in.gep
110  %fabs = call half @llvm.fabs.f16(half %val)
111  store half %fabs, ptr addrspace(1) %out
112  ret void
113}
114
115define amdgpu_kernel void @uniform_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
116; GCN-LABEL: name:            uniform_fabs_f16
117; GCN-LABEL: bb.0 (%ir-block.0)
118; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
119; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
120
121  %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
122  %val = load volatile half, ptr addrspace(1) %in.gep
123  %fabs = call half @llvm.fabs.f16(half %val)
124  store half %fabs, ptr addrspace(1) %out
125  ret void
126}
127
128define amdgpu_kernel void @divergent_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
129; GCN-LABEL: name:            divergent_fneg_f16
130; GCN-LABEL: bb.0 (%ir-block.0)
131; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
132; FP16: V_XOR_B32_e64 killed %[[REG]]
133
134  %tid = call i32 @llvm.amdgcn.workitem.id.x()
135  %tid.ext = sext i32 %tid to i64
136  %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
137  %val = load volatile half, ptr addrspace(1) %in.gep
138  %fneg = fneg half %val
139  store half %fneg, ptr addrspace(1) %out
140  ret void
141}
142
143define amdgpu_kernel void @uniform_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
144; GCN-LABEL: name:            uniform_fneg_f16
145; GCN-LABEL: bb.0 (%ir-block.0)
146; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
147; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
148
149  %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
150  %val = load volatile half, ptr addrspace(1) %in.gep
151  %fneg = fneg half %val
152  store half %fneg, ptr addrspace(1) %out
153  ret void
154}
155
156define amdgpu_kernel void @divergent_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
157; GCN-LABEL: name:            divergent_fneg_fabs_f16
158; GCN-LABEL: bb.0 (%ir-block.0)
159; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
160; FP16: V_OR_B32_e64 killed %[[REG]]
161
162  %tid = call i32 @llvm.amdgcn.workitem.id.x()
163  %tid.ext = sext i32 %tid to i64
164  %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
165  %val = load volatile half, ptr addrspace(1) %in.gep
166  %fabs = call half @llvm.fabs.f16(half %val)
167  %fneg = fneg half %fabs
168  store half %fneg, ptr addrspace(1) %out
169  ret void
170}
171
172define amdgpu_kernel void @uniform_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
173; GCN-LABEL: name:            uniform_fneg_fabs_f16
174; GCN-LABEL: bb.0 (%ir-block.0)
175; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
176; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
177
178  %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
179  %val = load volatile half, ptr addrspace(1) %in.gep
180  %fabs = call half @llvm.fabs.f16(half %val)
181  %fneg = fneg half %fabs
182  store half %fneg, ptr addrspace(1) %out
183  ret void
184}
185
186define amdgpu_kernel void @divergent_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
187; GCN-LABEL: name:            divergent_fneg_v2f16
188; GCN-LABEL: bb.0 (%ir-block.0)
189; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
190; FP16: V_XOR_B32_e64 killed %[[REG]]
191
192  %tid = call i32 @llvm.amdgcn.workitem.id.x()
193  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
194  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
195  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
196  %fneg = fneg <2 x half> %val
197  store <2 x half> %fneg, ptr addrspace(1) %gep.out
198  ret void
199}
200
201define amdgpu_kernel void @uniform_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
202; GCN-LABEL: name:            uniform_fneg_v2f16
203; GCN-LABEL: bb.0 (%ir-block.0)
204; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
205; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
206
207  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
208  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
209  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
210  %fneg = fneg <2 x half> %val
211  store <2 x half> %fneg, ptr addrspace(1) %gep.out
212  ret void
213}
214
215define amdgpu_kernel void @divergent_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
216; GCN-LABEL: name:            divergent_fabs_v2f16
217; GCN-LABEL: bb.0 (%ir-block.0)
218; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
219; FP16: V_AND_B32_e64 killed %[[REG]]
220
221  %tid = call i32 @llvm.amdgcn.workitem.id.x()
222  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
223  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
224  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
225  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
226  store <2 x half> %fabs, ptr addrspace(1) %gep.out
227  ret void
228}
229
230define amdgpu_kernel void @uniform_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
231; GCN-LABEL: name:            uniform_fabs_v2f16
232; GCN-LABEL: bb.0 (%ir-block.0)
233; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
234; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
235
236  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
237  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
238  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
239  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
240  store <2 x half> %fabs, ptr addrspace(1) %gep.out
241  ret void
242}
243
244define amdgpu_kernel void @divergent_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
245; GCN-LABEL: name:            divergent_fneg_fabs_v2f16
246; GCN-LABEL: bb.0 (%ir-block.0)
247; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
248; FP16: V_OR_B32_e64 killed %[[REG]]
249
250  %tid = call i32 @llvm.amdgcn.workitem.id.x()
251  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
252  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
253  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
254  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
255  %fneg = fneg <2 x half> %fabs
256  store <2 x half> %fneg, ptr addrspace(1) %gep.out
257  ret void
258}
259
260define amdgpu_kernel void @uniform_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
261; GCN-LABEL: name:            uniform_fneg_fabs_v2f16
262; GCN-LABEL: bb.0 (%ir-block.0)
263; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
264; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
265
266  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
267  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
268  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
269  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
270  %fneg = fneg <2 x half> %fabs
271  store <2 x half> %fneg, ptr addrspace(1) %gep.out
272  ret void
273}
274
275define amdgpu_kernel void @divergent_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
276; GCN-LABEL: name:            divergent_fneg_v2f32
277; GCN-LABEL: bb.0 (%ir-block.0)
278; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
279; GCN: V_XOR_B32_e64 %[[REG]]
280; GCN: V_XOR_B32_e64 %[[REG]]
281
282  %tid = call i32 @llvm.amdgcn.workitem.id.x()
283  %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
284  %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
285  %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
286  %fneg = fneg <2 x float> %val
287  store <2 x float> %fneg, ptr addrspace(1) %gep.out
288  ret void
289}
290
291define amdgpu_kernel void @uniform_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
292; GCN-LABEL: name:            uniform_fneg_v2f32
293; GCN-LABEL: bb.0 (%ir-block.0)
294; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
295; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
296; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
297
298  %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
299  %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
300  %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
301  %fneg = fneg <2 x float> %val
302  store <2 x float> %fneg, ptr addrspace(1) %gep.out
303  ret void
304}
305
306define amdgpu_kernel void @divergent_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
307; GCN-LABEL: name:            divergent_fabs_v2f32
308; GCN-LABEL: bb.0 (%ir-block.0)
309; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
310; GCN: V_AND_B32_e64 %[[REG]]
311; GCN: V_AND_B32_e64 %[[REG]]
312
313  %tid = call i32 @llvm.amdgcn.workitem.id.x()
314  %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
315  %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
316  %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
317  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
318  store <2 x float> %fabs, ptr addrspace(1) %gep.out
319  ret void
320}
321
322define amdgpu_kernel void @uniform_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
323; GCN-LABEL: name:            uniform_fabs_v2f32
324; GCN-LABEL: bb.0 (%ir-block.0)
325; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
326; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
327; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
328
329  %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
330  %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
331  %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
332  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
333  store <2 x float> %fabs, ptr addrspace(1) %gep.out
334  ret void
335}
336
337define amdgpu_kernel void @divergent_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
338; GCN-LABEL: name:            divergent_fneg_fabs_v2f32
339; GCN-LABEL: bb.0 (%ir-block.0)
340; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
341; GCN: V_OR_B32_e64 %[[REG]]
342; GCN: V_OR_B32_e64 %[[REG]]
343
344  %tid = call i32 @llvm.amdgcn.workitem.id.x()
345  %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
346  %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
347  %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
348  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
349  %fneg = fneg <2 x float> %fabs
350  store <2 x float> %fneg, ptr addrspace(1) %gep.out
351  ret void
352}
353
354define amdgpu_kernel void @uniform_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
355; GCN-LABEL: name:            uniform_fneg_fabs_v2f32
356; GCN-LABEL: bb.0 (%ir-block.0)
357; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
358; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
359; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
360
361  %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
362  %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
363  %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
364  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
365  %fneg = fneg <2 x float> %fabs
366  store <2 x float> %fneg, ptr addrspace(1) %gep.out
367  ret void
368}
369
370define amdgpu_kernel void @divergent_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
371; GCN-LABEL: name:            divergent_fneg_f64
372; GCN-LABEL: bb.0 (%ir-block.0)
373; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
374; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
375; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
376; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
377; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed  %[[HI32]]
378; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
379; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1
380
381
382  %tid = call i32 @llvm.amdgcn.workitem.id.x()
383  %tid.ext = sext i32 %tid to i64
384  %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
385  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
386  %val = load volatile double, ptr addrspace(1) %in.gep
387  %fneg = fneg double %val
388  store double %fneg, ptr addrspace(1) %out.gep
389  ret void
390}
391
392define amdgpu_kernel void @uniform_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
393; GCN-LABEL: name:            uniform_fneg_f64
394; GCN-LABEL: bb.0 (%ir-block.0)
395; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
396; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
397; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
398; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
399; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
400; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
401; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]]
402; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1
403
404  %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
405  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
406  %val = load volatile double, ptr addrspace(1) %in.gep
407  %fneg = fneg double %val
408  store double %fneg, ptr addrspace(1) %out.gep
409  ret void
410}
411
412define amdgpu_kernel void @divergent_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
413; GCN-LABEL: name:            divergent_fabs_f64
414; GCN-LABEL: bb.0 (%ir-block.0)
415; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
416; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
417; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
418; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
419; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed  %[[HI32]]
420; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
421; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1
422
423
424  %tid = call i32 @llvm.amdgcn.workitem.id.x()
425  %tid.ext = sext i32 %tid to i64
426  %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
427  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
428  %val = load volatile double, ptr addrspace(1) %in.gep
429  %fabs = call double @llvm.fabs.f64(double %val)
430  store double %fabs, ptr addrspace(1) %out.gep
431  ret void
432}
433
434define amdgpu_kernel void @uniform_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
435; GCN-LABEL: name:            uniform_fabs_f64
436; GCN-LABEL: bb.0 (%ir-block.0)
437; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
438; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
439; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
440; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
441; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
442; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]]
443; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]]
444; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1
445
446
447  %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
448  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
449  %val = load volatile double, ptr addrspace(1) %in.gep
450  %fabs = call double @llvm.fabs.f64(double %val)
451  store double %fabs, ptr addrspace(1) %out.gep
452  ret void
453}
454
455define amdgpu_kernel void @divergent_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
456; GCN-LABEL: name:            divergent_fneg_fabs_f64
457; GCN-LABEL: bb.0 (%ir-block.0)
458; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
459; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
460; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
461; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
462; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed  %[[HI32]]
463; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
464; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1
465
466
467  %tid = call i32 @llvm.amdgcn.workitem.id.x()
468  %tid.ext = sext i32 %tid to i64
469  %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
470  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
471  %val = load volatile double, ptr addrspace(1) %in.gep
472  %fabs = call double @llvm.fabs.f64(double %val)
473  %fneg = fneg double %fabs
474  store double %fneg, ptr addrspace(1) %out.gep
475  ret void
476}
477
478define amdgpu_kernel void @uniform_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
479; GCN-LABEL: name:            uniform_fneg_fabs_f64
480; GCN-LABEL: bb.0 (%ir-block.0)
481; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
482; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
483; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
484; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
485; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
486; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
487; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]]
488; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1
489
490
491  %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
492  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
493  %val = load volatile double, ptr addrspace(1) %in.gep
494  %fabs = call double @llvm.fabs.f64(double %val)
495  %fneg = fneg double %fabs
496  store double %fneg, ptr addrspace(1) %out.gep
497  ret void
498}
499
500declare float @llvm.fabs.f32(float)
501declare half @llvm.fabs.f16(half)
502declare double @llvm.fabs.f64(double)
503declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
504declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
505
506declare i32 @llvm.amdgcn.workitem.id.x()
507