xref: /llvm-project/llvm/test/CodeGen/AMDGPU/bitop3.ll (revision 01a7d4e26b9bac27e282b113209f53c4c1d290b2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s
3; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s
4
5; ========= Single bit functions =========
6
7define amdgpu_ps float @not_and_not_and_not_and(i32 %a, i32 %b, i32 %c) {
8; GCN-LABEL: not_and_not_and_not_and:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:1
11; GCN-NEXT:    ; return to shader part epilog
12  %nota = xor i32 %a, -1
13  %notb = xor i32 %b, -1
14  %notc = xor i32 %c, -1
15  %and1 = and i32 %nota, %notc
16  %and2 = and i32 %and1, %notb
17  %ret_cast = bitcast i32 %and2 to float
18  ret float %ret_cast
19}
20
21define amdgpu_ps float @not_and_not_and_and(i32 %a, i32 %b, i32 %c) {
22; GCN-LABEL: not_and_not_and_and:
23; GCN:       ; %bb.0:
24; GCN-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:2
25; GCN-NEXT:    ; return to shader part epilog
26  %nota = xor i32 %a, -1
27  %notb = xor i32 %b, -1
28  %and1 = and i32 %nota, %c
29  %and2 = and i32 %and1, %notb
30  %ret_cast = bitcast i32 %and2 to float
31  ret float %ret_cast
32}
33
34define amdgpu_ps float @not_and_and_not_and(i32 %a, i32 %b, i32 %c) {
35; GCN-LABEL: not_and_and_not_and:
36; GCN:       ; %bb.0:
37; GCN-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:4
38; GCN-NEXT:    ; return to shader part epilog
39  %nota = xor i32 %a, -1
40  %notc = xor i32 %c, -1
41  %and1 = and i32 %nota, %notc
42  %and2 = and i32 %and1, %b
43  %ret_cast = bitcast i32 %and2 to float
44  ret float %ret_cast
45}
46
47define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) {
48; GFX950-SDAG-LABEL: not_and_and_and:
49; GFX950-SDAG:       ; %bb.0:
50; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:8
51; GFX950-SDAG-NEXT:    ; return to shader part epilog
52;
53; GFX950-GISEL-LABEL: not_and_and_and:
54; GFX950-GISEL:       ; %bb.0:
55; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
56; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
57; GFX950-GISEL-NEXT:    ; return to shader part epilog
58  %nota = xor i32 %a, -1
59  %and1 = and i32 %nota, %c
60  %and2 = and i32 %and1, %b
61  %ret_cast = bitcast i32 %and2 to float
62  ret float %ret_cast
63}
64
65define amdgpu_ps float @and_not_and_not_and(i32 %a, i32 %b, i32 %c) {
66; GCN-LABEL: and_not_and_not_and:
67; GCN:       ; %bb.0:
68; GCN-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x10
69; GCN-NEXT:    ; return to shader part epilog
70  %notb = xor i32 %b, -1
71  %notc = xor i32 %c, -1
72  %and1 = and i32 %a, %notc
73  %and2 = and i32 %and1, %notb
74  %ret_cast = bitcast i32 %and2 to float
75  ret float %ret_cast
76}
77
78define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) {
79; GFX950-SDAG-LABEL: and_not_and_and:
80; GFX950-SDAG:       ; %bb.0:
81; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20
82; GFX950-SDAG-NEXT:    ; return to shader part epilog
83;
84; GFX950-GISEL-LABEL: and_not_and_and:
85; GFX950-GISEL:       ; %bb.0:
86; GFX950-GISEL-NEXT:    v_not_b32_e32 v1, v1
87; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
88; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
89; GFX950-GISEL-NEXT:    ; return to shader part epilog
90  %notb = xor i32 %b, -1
91  %and1 = and i32 %a, %c
92  %and2 = and i32 %and1, %notb
93  %ret_cast = bitcast i32 %and2 to float
94  ret float %ret_cast
95}
96
97define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
98; GFX950-SDAG-LABEL: and_and_not_and:
99; GFX950-SDAG:       ; %bb.0:
100; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40
101; GFX950-SDAG-NEXT:    ; return to shader part epilog
102;
103; GFX950-GISEL-LABEL: and_and_not_and:
104; GFX950-GISEL:       ; %bb.0:
105; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30
106; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
107; GFX950-GISEL-NEXT:    ; return to shader part epilog
108  %notc = xor i32 %c, -1
109  %and1 = and i32 %a, %notc
110  %and2 = and i32 %and1, %b
111  %ret_cast = bitcast i32 %and2 to float
112  ret float %ret_cast
113}
114
115define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
116; GFX950-SDAG-LABEL: and_and_and:
117; GFX950-SDAG:       ; %bb.0:
118; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
119; GFX950-SDAG-NEXT:    ; return to shader part epilog
120;
121; GFX950-GISEL-LABEL: and_and_and:
122; GFX950-GISEL:       ; %bb.0:
123; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
124; GFX950-GISEL-NEXT:    ; return to shader part epilog
125  %and1 = and i32 %a, %c
126  %and2 = and i32 %and1, %b
127  %ret_cast = bitcast i32 %and2 to float
128  ret float %ret_cast
129}
130
131; ========= Multi bit functions =========
132
133define amdgpu_ps float @test_12(i32 %a, i32 %b) {
134; GFX950-SDAG-LABEL: test_12:
135; GFX950-SDAG:       ; %bb.0:
136; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
137; GFX950-SDAG-NEXT:    ; return to shader part epilog
138;
139; GFX950-GISEL-LABEL: test_12:
140; GFX950-GISEL:       ; %bb.0:
141; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
142; GFX950-GISEL-NEXT:    ; return to shader part epilog
143  %nota = xor i32 %a, -1
144  %and1 = and i32 %nota, %b
145  %ret_cast = bitcast i32 %and1 to float
146  ret float %ret_cast
147}
148
149define amdgpu_ps float @test_63(i32 %a, i32 %b) {
150; GFX950-SDAG-LABEL: test_63:
151; GFX950-SDAG:       ; %bb.0:
152; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f
153; GFX950-SDAG-NEXT:    ; return to shader part epilog
154;
155; GFX950-GISEL-LABEL: test_63:
156; GFX950-GISEL:       ; %bb.0:
157; GFX950-GISEL-NEXT:    v_not_b32_e32 v0, v0
158; GFX950-GISEL-NEXT:    v_not_b32_e32 v1, v1
159; GFX950-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
160; GFX950-GISEL-NEXT:    ; return to shader part epilog
161  %nota = xor i32 %a, -1
162  %notb = xor i32 %b, -1
163  %or = or i32 %nota, %notb
164  %ret_cast = bitcast i32 %or to float
165  ret float %ret_cast
166}
167
168define amdgpu_ps float @test_59(i32 %a, i32 %b, i32 %c) {
169; GCN-LABEL: test_59:
170; GCN:       ; %bb.0:
171; GCN-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x3b
172; GCN-NEXT:    ; return to shader part epilog
173  %nota = xor i32 %a, -1
174  %notb = xor i32 %b, -1
175  %and1 = and i32 %nota, %c
176  %or = or i32 %and1, %notb
177  %ret_cast = bitcast i32 %or to float
178  ret float %ret_cast
179}
180
181define amdgpu_ps float @test_126(i32 %a, i32 %b, i32 %c) {
182; GFX950-SDAG-LABEL: test_126:
183; GFX950-SDAG:       ; %bb.0:
184; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e
185; GFX950-SDAG-NEXT:    ; return to shader part epilog
186;
187; GFX950-GISEL-LABEL: test_126:
188; GFX950-GISEL:       ; %bb.0:
189; GFX950-GISEL-NEXT:    v_xor_b32_e32 v1, v0, v1
190; GFX950-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
191; GFX950-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
192; GFX950-GISEL-NEXT:    ; return to shader part epilog
193  %xor1 = xor i32 %a, %b
194  %xor2 = xor i32 %a, %c
195  %or = or i32 %xor1, %xor2
196  %ret_cast = bitcast i32 %or to float
197  ret float %ret_cast
198}
199
200; Src vector exhausted during search but recovered using 'not' lookahead.
201; GlobalISel has slightly different input, so it does not happen.
202
203; FIXME: Improve global isel code.
204
205define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
206; GFX950-SDAG-LABEL: test_12_src_overflow:
207; GFX950-SDAG:       ; %bb.0:
208; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
209; GFX950-SDAG-NEXT:    ; return to shader part epilog
210;
211; GFX950-GISEL-LABEL: test_12_src_overflow:
212; GFX950-GISEL:       ; %bb.0:
213; GFX950-GISEL-NEXT:    v_not_b32_e32 v3, v0
214; GFX950-GISEL-NEXT:    v_not_b32_e32 v4, v2
215; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
216; GFX950-GISEL-NEXT:    v_and_b32_e32 v2, v3, v4
217; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8
218; GFX950-GISEL-NEXT:    ; return to shader part epilog
219  %nota = xor i32 %a, -1
220  %notc = xor i32 %c, -1
221  %and1 = and i32 %nota, %c
222  %and2 = and i32 %and1, %b
223  %and3 = and i32 %nota, %notc
224  %and4 = and i32 %and3, %b
225  %or = or i32 %and2, %and4
226  %ret_cast = bitcast i32 %or to float
227  ret float %ret_cast
228}
229
230; This could be a single LOP3 operation with tbl = 100, but Src vector exhausted during search.
231
232define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) {
233; GFX950-SDAG-LABEL: test_100_src_overflow:
234; GFX950-SDAG:       ; %bb.0:
235; GFX950-SDAG-NEXT:    v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10
236; GFX950-SDAG-NEXT:    v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40
237; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20
238; GFX950-SDAG-NEXT:    v_or3_b32 v0, v3, v4, v0
239; GFX950-SDAG-NEXT:    ; return to shader part epilog
240;
241; GFX950-GISEL-LABEL: test_100_src_overflow:
242; GFX950-GISEL:       ; %bb.0:
243; GFX950-GISEL-NEXT:    v_bitop3_b32 v3, v2, v0, v2 bitop3:3
244; GFX950-GISEL-NEXT:    v_and_b32_e32 v3, v1, v3
245; GFX950-GISEL-NEXT:    v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30
246; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, v1, v0
247; GFX950-GISEL-NEXT:    v_not_b32_e32 v1, v2
248; GFX950-GISEL-NEXT:    v_and_b32_e32 v4, v4, v2
249; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
250; GFX950-GISEL-NEXT:    v_or3_b32 v0, v3, v4, v0
251; GFX950-GISEL-NEXT:    ; return to shader part epilog
252  %or1 = or i32 %c, %a
253  %not1 = xor i32 %or1, -1
254  %and1 = and i32 %b, %not1
255  %not2 = xor i32 %b, -1
256  %and2 = and i32 %a, %not2
257  %and3 = and i32 %and2, %c
258  %and4 = and i32 %b, %a
259  %not3 = xor i32 %c, -1
260  %and5 = and i32 %and4, %not3
261  %or2 = or i32 %and1, %and3
262  %or3 = or i32 %or2, %and5
263  %ret_cast = bitcast i32 %or3 to float
264  ret float %ret_cast
265}
266
267; ========= Ternary logical operations take precedence =========
268
269define amdgpu_ps float @test_xor3(i32 %a, i32 %b, i32 %c) {
270; GCN-LABEL: test_xor3:
271; GCN:       ; %bb.0:
272; GCN-NEXT:    v_xor_b32_e32 v0, v0, v1
273; GCN-NEXT:    v_xor_b32_e32 v0, v0, v2
274; GCN-NEXT:    ; return to shader part epilog
275  %xor1 = xor i32 %a, %b
276  %xor2 = xor i32 %xor1, %c
277  %ret_cast = bitcast i32 %xor2 to float
278  ret float %ret_cast
279}
280
281define amdgpu_ps float @test_or3(i32 %a, i32 %b, i32 %c) {
282; GCN-LABEL: test_or3:
283; GCN:       ; %bb.0:
284; GCN-NEXT:    v_or3_b32 v0, v0, v1, v2
285; GCN-NEXT:    ; return to shader part epilog
286  %or1 = or i32 %a, %b
287  %or2 = or i32 %or1, %c
288  %ret_cast = bitcast i32 %or2 to float
289  ret float %ret_cast
290}
291
292define amdgpu_ps float @test_and_or(i32 %a, i32 %b, i32 %c) {
293; GCN-LABEL: test_and_or:
294; GCN:       ; %bb.0:
295; GCN-NEXT:    v_and_or_b32 v0, v0, v1, v2
296; GCN-NEXT:    ; return to shader part epilog
297  %and1 = and i32 %a, %b
298  %or1 = or i32 %and1, %c
299  %ret_cast = bitcast i32 %or1 to float
300  ret float %ret_cast
301}
302
303; ========= Uniform cases =========
304
305define amdgpu_ps float @uniform_3_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
306; GCN-LABEL: uniform_3_op:
307; GCN:       ; %bb.0:
308; GCN-NEXT:    s_andn2_b32 s0, s2, s0
309; GCN-NEXT:    s_and_b32 s0, s0, s1
310; GCN-NEXT:    v_mov_b32_e32 v0, s0
311; GCN-NEXT:    ; return to shader part epilog
312  %nota = xor i32 %a, -1
313  %and1 = and i32 %nota, %c
314  %and2 = and i32 %and1, %b
315  %ret_cast = bitcast i32 %and2 to float
316  ret float %ret_cast
317}
318
319define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
320; GFX950-SDAG-LABEL: uniform_4_op:
321; GFX950-SDAG:       ; %bb.0:
322; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, s1
323; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, s2
324; GFX950-SDAG-NEXT:    v_bitop3_b32 v0, s0, v0, v1 bitop3:2
325; GFX950-SDAG-NEXT:    ; return to shader part epilog
326;
327; GFX950-GISEL-LABEL: uniform_4_op:
328; GFX950-GISEL:       ; %bb.0:
329; GFX950-GISEL-NEXT:    s_andn2_b32 s0, s2, s0
330; GFX950-GISEL-NEXT:    s_andn2_b32 s0, s0, s1
331; GFX950-GISEL-NEXT:    v_mov_b32_e32 v0, s0
332; GFX950-GISEL-NEXT:    ; return to shader part epilog
333  %nota = xor i32 %a, -1
334  %notb = xor i32 %b, -1
335  %and1 = and i32 %nota, %c
336  %and2 = and i32 %and1, %notb
337  %ret_cast = bitcast i32 %and2 to float
338  ret float %ret_cast
339}
340
341; ========= 16 bit tests =========
342
343define amdgpu_ps half @not_and_not_and_not_and_b16(i16 %a, i16 %b, i16 %c) {
344; GCN-LABEL: not_and_not_and_not_and_b16:
345; GCN:       ; %bb.0:
346; GCN-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:1
347; GCN-NEXT:    ; return to shader part epilog
348  %nota = xor i16 %a, -1
349  %notb = xor i16 %b, -1
350  %notc = xor i16 %c, -1
351  %and1 = and i16 %nota, %notc
352  %and2 = and i16 %and1, %notb
353  %ret_cast = bitcast i16 %and2 to half
354  ret half %ret_cast
355}
356
357define amdgpu_ps half @not_and_not_and_and_b16(i16 %a, i16 %b, i16 %c) {
358; GCN-LABEL: not_and_not_and_and_b16:
359; GCN:       ; %bb.0:
360; GCN-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:2
361; GCN-NEXT:    ; return to shader part epilog
362  %nota = xor i16 %a, -1
363  %notb = xor i16 %b, -1
364  %and1 = and i16 %nota, %c
365  %and2 = and i16 %and1, %notb
366  %ret_cast = bitcast i16 %and2 to half
367  ret half %ret_cast
368}
369
370define amdgpu_ps half @not_and_and_not_and_b16(i16 %a, i16 %b, i16 %c) {
371; GCN-LABEL: not_and_and_not_and_b16:
372; GCN:       ; %bb.0:
373; GCN-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:4
374; GCN-NEXT:    ; return to shader part epilog
375  %nota = xor i16 %a, -1
376  %notc = xor i16 %c, -1
377  %and1 = and i16 %nota, %notc
378  %and2 = and i16 %and1, %b
379  %ret_cast = bitcast i16 %and2 to half
380  ret half %ret_cast
381}
382
383define amdgpu_ps half @test_xor3_b16(i16 %a, i16 %b, i16 %c) {
384; GFX950-SDAG-LABEL: test_xor3_b16:
385; GFX950-SDAG:       ; %bb.0:
386; GFX950-SDAG-NEXT:    v_bitop3_b16 v0, v0, v2, v1 bitop3:0x96
387; GFX950-SDAG-NEXT:    ; return to shader part epilog
388;
389; GFX950-GISEL-LABEL: test_xor3_b16:
390; GFX950-GISEL:       ; %bb.0:
391; GFX950-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
392; GFX950-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
393; GFX950-GISEL-NEXT:    ; return to shader part epilog
394  %xor1 = xor i16 %a, %b
395  %xor2 = xor i16 %xor1, %c
396  %ret_cast = bitcast i16 %xor2 to half
397  ret half %ret_cast
398}
399
400define amdgpu_ps half @test_or3_b16(i16 %a, i16 %b, i16 %c) {
401; GFX950-SDAG-LABEL: test_or3_b16:
402; GFX950-SDAG:       ; %bb.0:
403; GFX950-SDAG-NEXT:    v_bitop3_b16 v0, v0, v2, v1 bitop3:0xfe
404; GFX950-SDAG-NEXT:    ; return to shader part epilog
405;
406; GFX950-GISEL-LABEL: test_or3_b16:
407; GFX950-GISEL:       ; %bb.0:
408; GFX950-GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
409; GFX950-GISEL-NEXT:    ; return to shader part epilog
410  %or1 = or i16 %a, %b
411  %or2 = or i16 %or1, %c
412  %ret_cast = bitcast i16 %or2 to half
413  ret half %ret_cast
414}
415
416define amdgpu_ps half @test_and_or_b16(i16 %a, i16 %b, i16 %c) {
417; GFX950-SDAG-LABEL: test_and_or_b16:
418; GFX950-SDAG:       ; %bb.0:
419; GFX950-SDAG-NEXT:    v_bitop3_b16 v0, v0, v2, v1 bitop3:0xec
420; GFX950-SDAG-NEXT:    ; return to shader part epilog
421;
422; GFX950-GISEL-LABEL: test_and_or_b16:
423; GFX950-GISEL:       ; %bb.0:
424; GFX950-GISEL-NEXT:    v_and_or_b32 v0, v0, v1, v2
425; GFX950-GISEL-NEXT:    ; return to shader part epilog
426  %and1 = and i16 %a, %b
427  %or1 = or i16 %and1, %c
428  %ret_cast = bitcast i16 %or1 to half
429  ret half %ret_cast
430}
431;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
432; GFX950: {{.*}}
433