xref: /llvm-project/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll (revision 3dd6750027cd168fce5fd9894b0bac0739652cf5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
3
4declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) #2
5declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) #2
6
7define <8 x half> @test1(<4 x float> noundef %a) {
8; CHECK-LABEL: test1:
9; CHECK:       // %bb.0: // %entry
10; CHECK-NEXT:    fcvtn v0.4h, v0.4s
11; CHECK-NEXT:    ret
12entry:
13  %vcvt_f16_f321.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a)
14  %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half>
15  %shuffle.i = shufflevector <4 x half> %0, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16  ret <8 x half> %shuffle.i
17}
18
19define <8 x i8> @test2(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
20; CHECK-LABEL: test2:
21; CHECK:       // %bb.0: // %entry
22; CHECK-NEXT:    ldr q1, [x0]
23; CHECK-NEXT:    shrn v1.8b, v1.8h, #4
24; CHECK-NEXT:    tbl v0.8b, { v1.16b }, v0.8b
25; CHECK-NEXT:    ret
26entry:
27  %0 = load <8 x i16>, ptr %in, align 2
28  %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
29  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
30  %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
31  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
32  ret <8 x i8> %vtbl11.i
33}
34
35define <8 x i8> @tbl1v8i8(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
36; CHECK-LABEL: tbl1v8i8:
37; CHECK:       // %bb.0: // %entry
38; CHECK-NEXT:    ldr q1, [x0]
39; CHECK-NEXT:    shrn v1.8b, v1.8h, #4
40; CHECK-NEXT:    tbl v0.8b, { v1.16b }, v0.8b
41; CHECK-NEXT:    ret
42entry:
43  %0 = load <8 x i16>, ptr %in, align 2
44  %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
45  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
46  %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
47  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
48  ret <8 x i8> %vtbl11.i
49}
50
51define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
52; CHECK-LABEL: addpv4i16:
53; CHECK:       // %bb.0: // %entry
54; CHECK-NEXT:    addp v0.4h, v0.4h, v1.4h
55; CHECK-NEXT:    ret
56entry:
57  %vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
58  %shuffle.i = shufflevector <4 x i16> %vpadd_v2.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
59  ret <8 x i16> %shuffle.i
60}
61
62define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
63; CHECK-LABEL: addv4i16:
64; CHECK:       // %bb.0: // %entry
65; CHECK-NEXT:    add v0.4h, v1.4h, v0.4h
66; CHECK-NEXT:    ret
67entry:
68  %add.i = add <4 x i16> %b, %a
69  %shuffle.i = shufflevector <4 x i16> %add.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
70  ret <8 x i16> %shuffle.i
71}
72
73define <16 x i8> @rshrn(<8 x i16> noundef %a, <4 x i16> noundef %b) {
74; CHECK-LABEL: rshrn:
75; CHECK:       // %bb.0: // %entry
76; CHECK-NEXT:    rshrn v0.8b, v0.8h, #3
77; CHECK-NEXT:    ret
78entry:
79  %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a, i32 3)
80  %shuffle.i = shufflevector <8 x i8> %vrshrn_n1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
81  ret <16 x i8> %shuffle.i
82}
83
84define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) {
85; CHECK-LABEL: tbl1:
86; CHECK:       // %bb.0: // %entry
87; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
88; CHECK-NEXT:    ret
89entry:
90  %vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
91  %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
92  ret <16 x i8> %shuffle.i
93}
94
95define <2 x double> @fadd(double noundef %x, double noundef %y) {
96; CHECK-LABEL: fadd:
97; CHECK:       // %bb.0: // %entry
98; CHECK-NEXT:    movi v2.2d, #0000000000000000
99; CHECK-NEXT:    fadd d0, d0, d1
100; CHECK-NEXT:    mov v2.d[0], v0.d[0]
101; CHECK-NEXT:    mov v0.16b, v2.16b
102; CHECK-NEXT:    ret
103entry:
104  %add = fadd double %x, %y
105  %vecinit1 = insertelement <2 x double> poison, double %add, i64 0
106  %vecinit2 = insertelement <2 x double> %vecinit1, double 0.0, i64 1
107  ret <2 x double> %vecinit2
108}
109
110define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) {
111; CHECK-LABEL: bsl:
112; CHECK:       // %bb.0: // %entry
113; CHECK-NEXT:    bsl v0.8b, v1.8b, v2.8b
114; CHECK-NEXT:    ret
115entry:
116  %vbsl3.i = and <4 x i16> %c, %a
117  %0 = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
118  %vbsl4.i = and <4 x i16> %0, %d
119  %vbsl5.i = or <4 x i16> %vbsl4.i, %vbsl3.i
120  %1 = bitcast <4 x i16> %vbsl5.i to <8 x i8>
121  %shuffle.i = shufflevector <8 x i8> %1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
122  ret <16 x i8> %shuffle.i
123}
124
125define <16 x i8> @load(ptr %a, <8 x i8> %b) {
126; CHECK-LABEL: load:
127; CHECK:       // %bb.0: // %entry
128; CHECK-NEXT:    ldr d0, [x0]
129; CHECK-NEXT:    ret
130entry:
131  %vtbl11 = load <8 x i8>, ptr %a
132  %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
133  ret <16 x i8> %shuffle.i
134}
135
136
137define <16 x i8> @insertzero_v8i8(<8 x i8> %a) {
138; CHECK-LABEL: insertzero_v8i8:
139; CHECK:       // %bb.0: // %entry
140; CHECK-NEXT:    fmov d0, d0
141; CHECK-NEXT:    ret
142entry:
143  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
144  ret <16 x i8> %shuffle.i
145}
146
147define <8 x i16> @insertzero_v4i16(<4 x i16> %a) {
148; CHECK-LABEL: insertzero_v4i16:
149; CHECK:       // %bb.0: // %entry
150; CHECK-NEXT:    fmov d0, d0
151; CHECK-NEXT:    ret
152entry:
153  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
154  ret <8 x i16> %shuffle.i
155}
156
157define <4 x i32> @insertzero_v2i32(<2 x i32> %a) {
158; CHECK-LABEL: insertzero_v2i32:
159; CHECK:       // %bb.0: // %entry
160; CHECK-NEXT:    fmov d0, d0
161; CHECK-NEXT:    ret
162entry:
163  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
164  ret <4 x i32> %shuffle.i
165}
166
167define <2 x i64> @insertzero_v1i64(<1 x i64> %a) {
168; CHECK-LABEL: insertzero_v1i64:
169; CHECK:       // %bb.0: // %entry
170; CHECK-NEXT:    fmov d0, d0
171; CHECK-NEXT:    ret
172entry:
173  %shuffle.i = shufflevector <1 x i64> %a, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
174  ret <2 x i64> %shuffle.i
175}
176
177define <8 x half> @insertzero_v4f16(<4 x half> %a) {
178; CHECK-LABEL: insertzero_v4f16:
179; CHECK:       // %bb.0: // %entry
180; CHECK-NEXT:    fmov d0, d0
181; CHECK-NEXT:    ret
182entry:
183  %shuffle.i = shufflevector <4 x half> %a, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
184  ret <8 x half> %shuffle.i
185}
186
187define <8 x bfloat> @insertzero_v4bf16(<4 x bfloat> %a) {
188; CHECK-LABEL: insertzero_v4bf16:
189; CHECK:       // %bb.0: // %entry
190; CHECK-NEXT:    fmov d0, d0
191; CHECK-NEXT:    ret
192entry:
193  %shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
194  ret <8 x bfloat> %shuffle.i
195}
196
197define <4 x float> @insertzero_v2f32(<2 x float> %a) {
198; CHECK-LABEL: insertzero_v2f32:
199; CHECK:       // %bb.0: // %entry
200; CHECK-NEXT:    fmov d0, d0
201; CHECK-NEXT:    ret
202entry:
203  %shuffle.i = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
204  ret <4 x float> %shuffle.i
205}
206
207define <2 x double> @insertzero_v1f64(<1 x double> %a) {
208; CHECK-LABEL: insertzero_v1f64:
209; CHECK:       // %bb.0: // %entry
210; CHECK-NEXT:    fmov d0, d0
211; CHECK-NEXT:    ret
212entry:
213  %shuffle.i = shufflevector <1 x double> %a, <1 x double> zeroinitializer, <2 x i32> <i32 0, i32 1>
214  ret <2 x double> %shuffle.i
215}
216
217
218
219declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
220declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
221