xref: /llvm-project/llvm/test/CodeGen/ARM/vbsl.ll (revision f1bbabd6289a351430657a2eb3b57fffa8c6d248)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=armv7-eabihf -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s
3
4define <8 x i8> @v_bsli8(ptr %A, ptr %B, ptr %C) nounwind {
5; CHECK-LABEL: v_bsli8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldr d18, [r0]
8; CHECK-NEXT:    vldr d16, [r2]
9; CHECK-NEXT:    vorr d0, d18, d18
10; CHECK-NEXT:    vldr d17, [r1]
11; CHECK-NEXT:    vbsl d0, d17, d16
12; CHECK-NEXT:    bx lr
13	%tmp1 = load <8 x i8>, ptr %A
14	%tmp2 = load <8 x i8>, ptr %B
15	%tmp3 = load <8 x i8>, ptr %C
16	%tmp4 = and <8 x i8> %tmp1, %tmp2
17	%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
18	%tmp6 = and <8 x i8> %tmp5, %tmp3
19	%tmp7 = or <8 x i8> %tmp4, %tmp6
20	ret <8 x i8> %tmp7
21}
22
23define <4 x i16> @v_bsli16(ptr %A, ptr %B, ptr %C) nounwind {
24; CHECK-LABEL: v_bsli16:
25; CHECK:       @ %bb.0:
26; CHECK-NEXT:    vldr d18, [r0]
27; CHECK-NEXT:    vldr d16, [r2]
28; CHECK-NEXT:    vorr d0, d18, d18
29; CHECK-NEXT:    vldr d17, [r1]
30; CHECK-NEXT:    vbsl d0, d17, d16
31; CHECK-NEXT:    bx lr
32	%tmp1 = load <4 x i16>, ptr %A
33	%tmp2 = load <4 x i16>, ptr %B
34	%tmp3 = load <4 x i16>, ptr %C
35	%tmp4 = and <4 x i16> %tmp1, %tmp2
36	%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
37	%tmp6 = and <4 x i16> %tmp5, %tmp3
38	%tmp7 = or <4 x i16> %tmp4, %tmp6
39	ret <4 x i16> %tmp7
40}
41
42define <2 x i32> @v_bsli32(ptr %A, ptr %B, ptr %C) nounwind {
43; CHECK-LABEL: v_bsli32:
44; CHECK:       @ %bb.0:
45; CHECK-NEXT:    vldr d18, [r0]
46; CHECK-NEXT:    vldr d16, [r2]
47; CHECK-NEXT:    vorr d0, d18, d18
48; CHECK-NEXT:    vldr d17, [r1]
49; CHECK-NEXT:    vbsl d0, d17, d16
50; CHECK-NEXT:    bx lr
51	%tmp1 = load <2 x i32>, ptr %A
52	%tmp2 = load <2 x i32>, ptr %B
53	%tmp3 = load <2 x i32>, ptr %C
54	%tmp4 = and <2 x i32> %tmp1, %tmp2
55	%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
56	%tmp6 = and <2 x i32> %tmp5, %tmp3
57	%tmp7 = or <2 x i32> %tmp4, %tmp6
58	ret <2 x i32> %tmp7
59}
60
61define <1 x i64> @v_bsli64(ptr %A, ptr %B, ptr %C) nounwind {
62; CHECK-LABEL: v_bsli64:
63; CHECK:       @ %bb.0:
64; CHECK-NEXT:    vldr d18, [r0]
65; CHECK-NEXT:    vldr d16, [r2]
66; CHECK-NEXT:    vorr d0, d18, d18
67; CHECK-NEXT:    vldr d17, [r1]
68; CHECK-NEXT:    vbsl d0, d17, d16
69; CHECK-NEXT:    bx lr
70	%tmp1 = load <1 x i64>, ptr %A
71	%tmp2 = load <1 x i64>, ptr %B
72	%tmp3 = load <1 x i64>, ptr %C
73	%tmp4 = and <1 x i64> %tmp1, %tmp2
74	%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
75	%tmp6 = and <1 x i64> %tmp5, %tmp3
76	%tmp7 = or <1 x i64> %tmp4, %tmp6
77	ret <1 x i64> %tmp7
78}
79
80define <16 x i8> @v_bslQi8(ptr %A, ptr %B, ptr %C) nounwind {
81; CHECK-LABEL: v_bslQi8:
82; CHECK:       @ %bb.0:
83; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
84; CHECK-NEXT:    vorr q0, q10, q10
85; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
86; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
87; CHECK-NEXT:    vbsl q0, q9, q8
88; CHECK-NEXT:    bx lr
89	%tmp1 = load <16 x i8>, ptr %A
90	%tmp2 = load <16 x i8>, ptr %B
91	%tmp3 = load <16 x i8>, ptr %C
92	%tmp4 = and <16 x i8> %tmp1, %tmp2
93	%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
94	%tmp6 = and <16 x i8> %tmp5, %tmp3
95	%tmp7 = or <16 x i8> %tmp4, %tmp6
96	ret <16 x i8> %tmp7
97}
98
99define <8 x i16> @v_bslQi16(ptr %A, ptr %B, ptr %C) nounwind {
100; CHECK-LABEL: v_bslQi16:
101; CHECK:       @ %bb.0:
102; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
103; CHECK-NEXT:    vorr q0, q10, q10
104; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
105; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
106; CHECK-NEXT:    vbsl q0, q9, q8
107; CHECK-NEXT:    bx lr
108	%tmp1 = load <8 x i16>, ptr %A
109	%tmp2 = load <8 x i16>, ptr %B
110	%tmp3 = load <8 x i16>, ptr %C
111	%tmp4 = and <8 x i16> %tmp1, %tmp2
112	%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
113	%tmp6 = and <8 x i16> %tmp5, %tmp3
114	%tmp7 = or <8 x i16> %tmp4, %tmp6
115	ret <8 x i16> %tmp7
116}
117
118define <4 x i32> @v_bslQi32(ptr %A, ptr %B, ptr %C) nounwind {
119; CHECK-LABEL: v_bslQi32:
120; CHECK:       @ %bb.0:
121; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
122; CHECK-NEXT:    vorr q0, q10, q10
123; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
124; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
125; CHECK-NEXT:    vbsl q0, q9, q8
126; CHECK-NEXT:    bx lr
127	%tmp1 = load <4 x i32>, ptr %A
128	%tmp2 = load <4 x i32>, ptr %B
129	%tmp3 = load <4 x i32>, ptr %C
130	%tmp4 = and <4 x i32> %tmp1, %tmp2
131	%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
132	%tmp6 = and <4 x i32> %tmp5, %tmp3
133	%tmp7 = or <4 x i32> %tmp4, %tmp6
134	ret <4 x i32> %tmp7
135}
136
137define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind {
138; CHECK-LABEL: v_bslQi64:
139; CHECK:       @ %bb.0:
140; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
141; CHECK-NEXT:    vorr q0, q10, q10
142; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
143; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
144; CHECK-NEXT:    vbsl q0, q9, q8
145; CHECK-NEXT:    bx lr
146	%tmp1 = load <2 x i64>, ptr %A
147	%tmp2 = load <2 x i64>, ptr %B
148	%tmp3 = load <2 x i64>, ptr %C
149	%tmp4 = and <2 x i64> %tmp1, %tmp2
150	%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
151	%tmp6 = and <2 x i64> %tmp5, %tmp3
152	%tmp7 = or <2 x i64> %tmp4, %tmp6
153	ret <2 x i64> %tmp7
154}
155
156define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp {
157; CHECK-LABEL: f1:
158; CHECK:       @ %bb.0:
159; CHECK-NEXT:    vbsl d0, d1, d2
160; CHECK-NEXT:    bx lr
161  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
162  ret <8 x i8> %vbsl.i
163}
164
165define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
166; CHECK-LABEL: f2:
167; CHECK:       @ %bb.0:
168; CHECK-NEXT:    vbsl d0, d1, d2
169; CHECK-NEXT:    bx lr
170  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
171  ret <4 x i16> %vbsl3.i
172}
173
174define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
175; CHECK-LABEL: f3:
176; CHECK:       @ %bb.0:
177; CHECK-NEXT:    vbsl d0, d1, d2
178; CHECK-NEXT:    bx lr
179  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
180  ret <2 x i32> %vbsl3.i
181}
182
183define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp {
184; CHECK-LABEL: f4:
185; CHECK:       @ %bb.0:
186; CHECK-NEXT:    vbsl d0, d1, d2
187; CHECK-NEXT:    bx lr
188  %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
189  ret <2 x float> %vbsl4.i
190}
191
192define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
193; CHECK-LABEL: g1:
194; CHECK:       @ %bb.0:
195; CHECK-NEXT:    vbsl q0, q1, q2
196; CHECK-NEXT:    bx lr
197  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
198  ret <16 x i8> %vbsl.i
199}
200
201define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
202; CHECK-LABEL: g2:
203; CHECK:       @ %bb.0:
204; CHECK-NEXT:    vbsl q0, q1, q2
205; CHECK-NEXT:    bx lr
206  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
207  ret <8 x i16> %vbsl3.i
208}
209
210define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
211; CHECK-LABEL: g3:
212; CHECK:       @ %bb.0:
213; CHECK-NEXT:    vbsl q0, q1, q2
214; CHECK-NEXT:    bx lr
215  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
216  ret <4 x i32> %vbsl3.i
217}
218
219define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp {
220; CHECK-LABEL: g4:
221; CHECK:       @ %bb.0:
222; CHECK-NEXT:    vbsl q0, q1, q2
223; CHECK-NEXT:    bx lr
224  %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind
225  ret <4 x float> %vbsl4.i
226}
227
228define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
229; CHECK-LABEL: test_vbsl_s64:
230; CHECK:       @ %bb.0:
231; CHECK-NEXT:    vbsl d0, d1, d2
232; CHECK-NEXT:    bx lr
233  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
234  ret <1 x i64> %vbsl3.i
235}
236
237define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
238; CHECK-LABEL: test_vbsl_u64:
239; CHECK:       @ %bb.0:
240; CHECK-NEXT:    vbsl d0, d1, d2
241; CHECK-NEXT:    bx lr
242  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
243  ret <1 x i64> %vbsl3.i
244}
245
246define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
247; CHECK-LABEL: test_vbslq_s64:
248; CHECK:       @ %bb.0:
249; CHECK-NEXT:    vbsl q0, q1, q2
250; CHECK-NEXT:    bx lr
251  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
252  ret <2 x i64> %vbsl3.i
253}
254
255define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
256; CHECK-LABEL: test_vbslq_u64:
257; CHECK:       @ %bb.0:
258; CHECK-NEXT:    vbsl q0, q1, q2
259; CHECK-NEXT:    bx lr
260  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
261  ret <2 x i64> %vbsl3.i
262}
263
264define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
265; CHECK-LABEL: same_param_all:
266; CHECK:       @ %bb.0:
267; CHECK-NEXT:    vmov.f64 d0, d1
268; CHECK-NEXT:    bx lr
269  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b)
270  ret <8 x i8> %vbsl.i
271}
272
273define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) {
274; CHECK-LABEL: same_param_12:
275; CHECK:       @ %bb.0:
276; CHECK-NEXT:    vmov.f64 d0, d1
277; CHECK-NEXT:    bx lr
278  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b)
279  ret <8 x i8> %vbsl.i
280}
281
282define <8 x i8> @same_param_01(<8 x i8> %a, <8 x i8> %b) {
283; CHECK-LABEL: same_param_01:
284; CHECK:       @ %bb.0:
285; CHECK-NEXT:    vbif d0, d1, d0
286; CHECK-NEXT:    bx lr
287  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b)
288  ret <8 x i8> %vbsl.i
289}
290
291declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
292declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
293declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
294declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
295declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
296declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
297declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
298declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
299declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
300declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone
301