xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; BIT Bitwise Insert if True
6;
7; 8-bit vectors tests
8
9define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
10; CHECK-SD-LABEL: test_bit_v1i8:
11; CHECK-SD:       // %bb.0:
12; CHECK-SD-NEXT:    bit v0.8b, v1.8b, v2.8b
13; CHECK-SD-NEXT:    ret
14;
15; CHECK-GI-LABEL: test_bit_v1i8:
16; CHECK-GI:       // %bb.0:
17; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
18; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
19; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
20; CHECK-GI-NEXT:    umov w8, v2.b[0]
21; CHECK-GI-NEXT:    umov w9, v1.b[0]
22; CHECK-GI-NEXT:    umov w10, v0.b[0]
23; CHECK-GI-NEXT:    and w9, w8, w9
24; CHECK-GI-NEXT:    bic w8, w10, w8
25; CHECK-GI-NEXT:    orr w8, w9, w8
26; CHECK-GI-NEXT:    fmov s0, w8
27; CHECK-GI-NEXT:    ret
28  %and = and <1 x i8> %C, %B
29  %neg = xor <1 x i8> %C, <i8 -1>
30  %and1 = and <1 x i8> %neg, %A
31  %or = or <1 x i8> %and, %and1
32  ret <1 x i8> %or
33}
34
35; 16-bit vectors tests
36
37define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
38; CHECK-SD-LABEL: test_bit_v1i16:
39; CHECK-SD:       // %bb.0:
40; CHECK-SD-NEXT:    bit v0.8b, v1.8b, v2.8b
41; CHECK-SD-NEXT:    ret
42;
43; CHECK-GI-LABEL: test_bit_v1i16:
44; CHECK-GI:       // %bb.0:
45; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
46; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
47; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
48; CHECK-GI-NEXT:    umov w8, v2.h[0]
49; CHECK-GI-NEXT:    umov w9, v1.h[0]
50; CHECK-GI-NEXT:    umov w10, v0.h[0]
51; CHECK-GI-NEXT:    and w9, w8, w9
52; CHECK-GI-NEXT:    bic w8, w10, w8
53; CHECK-GI-NEXT:    orr w8, w9, w8
54; CHECK-GI-NEXT:    fmov s0, w8
55; CHECK-GI-NEXT:    ret
56  %and = and <1 x i16> %C, %B
57  %neg = xor <1 x i16> %C, <i16 -1>
58  %and1 = and <1 x i16> %neg, %A
59  %or = or <1 x i16> %and, %and1
60  ret <1 x i16> %or
61}
62
63; 32-bit vectors tests
64
65define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
66; CHECK-SD-LABEL: test_bit_v1i32:
67; CHECK-SD:       // %bb.0:
68; CHECK-SD-NEXT:    bit v0.8b, v1.8b, v2.8b
69; CHECK-SD-NEXT:    ret
70;
71; CHECK-GI-LABEL: test_bit_v1i32:
72; CHECK-GI:       // %bb.0:
73; CHECK-GI-NEXT:    fmov w8, s2
74; CHECK-GI-NEXT:    fmov w9, s1
75; CHECK-GI-NEXT:    fmov w10, s0
76; CHECK-GI-NEXT:    and w9, w8, w9
77; CHECK-GI-NEXT:    bic w8, w10, w8
78; CHECK-GI-NEXT:    orr w8, w9, w8
79; CHECK-GI-NEXT:    mov v0.s[0], w8
80; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
81; CHECK-GI-NEXT:    ret
82  %and = and <1 x i32> %C, %B
83  %neg = xor <1 x i32> %C, <i32 -1>
84  %and1 = and <1 x i32> %neg, %A
85  %or = or <1 x i32> %and, %and1
86  ret <1 x i32> %or
87}
88
89; 64-bit vectors tests
90
91define <1 x i64> @test_bit_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C) {
92; CHECK-SD-LABEL: test_bit_v1i64:
93; CHECK-SD:       // %bb.0:
94; CHECK-SD-NEXT:    bit v0.8b, v1.8b, v2.8b
95; CHECK-SD-NEXT:    ret
96;
97; CHECK-GI-LABEL: test_bit_v1i64:
98; CHECK-GI:       // %bb.0:
99; CHECK-GI-NEXT:    fmov x8, d2
100; CHECK-GI-NEXT:    fmov x9, d1
101; CHECK-GI-NEXT:    fmov x10, d0
102; CHECK-GI-NEXT:    and x9, x8, x9
103; CHECK-GI-NEXT:    bic x8, x10, x8
104; CHECK-GI-NEXT:    orr x8, x9, x8
105; CHECK-GI-NEXT:    fmov d0, x8
106; CHECK-GI-NEXT:    ret
107  %and = and <1 x i64> %C, %B
108  %neg = xor <1 x i64> %C, <i64 -1>
109  %and1 = and <1 x i64> %neg, %A
110  %or = or <1 x i64> %and, %and1
111  ret <1 x i64> %or
112}
113
114define <2 x i32> @test_bit_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
115; CHECK-LABEL: test_bit_v2i32:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    bit v0.8b, v1.8b, v2.8b
118; CHECK-NEXT:    ret
119  %and = and <2 x i32> %C, %B
120  %neg = xor <2 x i32> %C, <i32 -1, i32 -1>
121  %and1 = and <2 x i32> %neg, %A
122  %or = or <2 x i32> %and, %and1
123  ret <2 x i32> %or
124}
125
126define <4 x i16> @test_bit_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
127; CHECK-LABEL: test_bit_v4i16:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    bit v0.8b, v1.8b, v2.8b
130; CHECK-NEXT:    ret
131  %and = and <4 x i16> %C, %B
132  %neg = xor <4 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1>
133  %and1 = and <4 x i16> %neg, %A
134  %or = or <4 x i16> %and, %and1
135  ret <4 x i16> %or
136}
137
138define <8 x i8> @test_bit_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
139; CHECK-LABEL: test_bit_v8i8:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    bit v0.8b, v1.8b, v2.8b
142; CHECK-NEXT:    ret
143  %and = and <8 x i8> %C, %B
144  %neg = xor <8 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
145  %and1 = and <8 x i8> %neg, %A
146  %or = or <8 x i8> %and, %and1
147  ret <8 x i8> %or
148}
149
150; 128-bit vectors tests
151
152define <2 x i64> @test_bit_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) {
153; CHECK-LABEL: test_bit_v2i64:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    bit v0.16b, v1.16b, v2.16b
156; CHECK-NEXT:    ret
157  %and = and <2 x i64> %C, %B
158  %neg = xor <2 x i64> %C, <i64 -1, i64 -1>
159  %and1 = and <2 x i64> %neg, %A
160  %or = or <2 x i64> %and, %and1
161  ret <2 x i64> %or
162}
163
164define <4 x i32> @test_bit_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
165; CHECK-LABEL: test_bit_v4i32:
166; CHECK:       // %bb.0:
167; CHECK-NEXT:    bit v0.16b, v1.16b, v2.16b
168; CHECK-NEXT:    ret
169  %and = and <4 x i32> %C, %B
170  %neg = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>
171  %and1 = and <4 x i32> %neg, %A
172  %or = or <4 x i32> %and, %and1
173  ret <4 x i32> %or
174}
175
176define <8 x i16> @test_bit_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
177; CHECK-LABEL: test_bit_v8i16:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    bit v0.16b, v1.16b, v2.16b
180; CHECK-NEXT:    ret
181  %and = and <8 x i16> %C, %B
182  %neg = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
183  %and1 = and <8 x i16> %neg, %A
184  %or = or <8 x i16> %and, %and1
185  ret <8 x i16> %or
186}
187
188define <16 x i8> @test_bit_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
189; CHECK-LABEL: test_bit_v16i8:
190; CHECK:       // %bb.0:
191; CHECK-NEXT:    bit v0.16b, v1.16b, v2.16b
192; CHECK-NEXT:    ret
193  %and = and <16 x i8> %C, %B
194  %neg = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
195  %and1 = and <16 x i8> %neg, %A
196  %or = or <16 x i8> %and, %and1
197  ret <16 x i8> %or
198}
199
200define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32> %mask, i32 %scratch) {
201; CHECK-SD-LABEL: test_bit_sink_operand:
202; CHECK-SD:       // %bb.0: // %entry
203; CHECK-SD-NEXT:    sub sp, sp, #32
204; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
205; CHECK-SD-NEXT:    add w8, w0, w0, lsr #31
206; CHECK-SD-NEXT:    mov w9, wzr
207; CHECK-SD-NEXT:    asr w8, w8, #1
208; CHECK-SD-NEXT:  .LBB11_1: // %do.body
209; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
210; CHECK-SD-NEXT:    bit v1.16b, v0.16b, v2.16b
211; CHECK-SD-NEXT:    add x10, sp, #16
212; CHECK-SD-NEXT:    mov x11, sp
213; CHECK-SD-NEXT:    bfi x10, x9, #2, #2
214; CHECK-SD-NEXT:    bfi x11, x9, #2, #2
215; CHECK-SD-NEXT:    add w9, w9, #1
216; CHECK-SD-NEXT:    cmp w9, #5
217; CHECK-SD-NEXT:    str q1, [sp, #16]
218; CHECK-SD-NEXT:    str w0, [x10]
219; CHECK-SD-NEXT:    ldr q1, [sp, #16]
220; CHECK-SD-NEXT:    str q0, [sp]
221; CHECK-SD-NEXT:    str w8, [x11]
222; CHECK-SD-NEXT:    ldr q0, [sp]
223; CHECK-SD-NEXT:    b.ne .LBB11_1
224; CHECK-SD-NEXT:  // %bb.2: // %do.end
225; CHECK-SD-NEXT:    mov v0.16b, v1.16b
226; CHECK-SD-NEXT:    add sp, sp, #32
227; CHECK-SD-NEXT:    ret
228;
229; CHECK-GI-LABEL: test_bit_sink_operand:
230; CHECK-GI:       // %bb.0: // %entry
231; CHECK-GI-NEXT:    sub sp, sp, #32
232; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
233; CHECK-GI-NEXT:    asr w9, w0, #31
234; CHECK-GI-NEXT:    mov w8, wzr
235; CHECK-GI-NEXT:    add x10, sp, #16
236; CHECK-GI-NEXT:    mov x11, sp
237; CHECK-GI-NEXT:    add w9, w0, w9, lsr #31
238; CHECK-GI-NEXT:    asr w9, w9, #1
239; CHECK-GI-NEXT:  .LBB11_1: // %do.body
240; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
241; CHECK-GI-NEXT:    bit v1.16b, v0.16b, v2.16b
242; CHECK-GI-NEXT:    mov w12, w8
243; CHECK-GI-NEXT:    add w8, w8, #1
244; CHECK-GI-NEXT:    and x12, x12, #0x3
245; CHECK-GI-NEXT:    cmp w8, #5
246; CHECK-GI-NEXT:    str q1, [sp, #16]
247; CHECK-GI-NEXT:    str w0, [x10, x12, lsl #2]
248; CHECK-GI-NEXT:    ldr q1, [sp, #16]
249; CHECK-GI-NEXT:    str q0, [sp]
250; CHECK-GI-NEXT:    str w9, [x11, x12, lsl #2]
251; CHECK-GI-NEXT:    ldr q0, [sp]
252; CHECK-GI-NEXT:    b.ne .LBB11_1
253; CHECK-GI-NEXT:  // %bb.2: // %do.end
254; CHECK-GI-NEXT:    mov v0.16b, v1.16b
255; CHECK-GI-NEXT:    add sp, sp, #32
256; CHECK-GI-NEXT:    ret
257
258entry:
259  %0 = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
260  %div = sdiv i32 %scratch, 2
261  br label %do.body
262
263do.body:
264  %dst.addr.0 = phi <4 x i32> [ %dst, %entry ], [ %vecins, %do.body ]
265  %src.addr.0 = phi <4 x i32> [ %src, %entry ], [ %vecins1, %do.body ]
266  %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
267  %vbsl3.i = and <4 x i32> %src.addr.0, %mask
268  %vbsl4.i = and <4 x i32> %dst.addr.0, %0
269  %vbsl5.i = or <4 x i32> %vbsl3.i, %vbsl4.i
270  %vecins = insertelement <4 x i32> %vbsl5.i, i32 %scratch, i32 %i.0
271  %vecins1 = insertelement <4 x i32> %src.addr.0, i32 %div, i32 %i.0
272  %inc = add nuw nsw i32 %i.0, 1
273  %exitcond.not = icmp eq i32 %inc, 5
274  br i1 %exitcond.not, label %do.end, label %do.body
275
276do.end:
277  ret <4 x i32> %vecins
278}
279