xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=generic -aarch64-neon-syntax=apple -mattr="+fullfp16" | FileCheck %s
3
4define void @test0f(ptr nocapture %x, float %a) #0 {
5; CHECK-LABEL: test0f:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    movi.2d v1, #0000000000000000
8; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
9; CHECK-NEXT:    mov.s v1[0], v0[0]
10; CHECK-NEXT:    str q1, [x0]
11; CHECK-NEXT:    ret
12entry:
13  %0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0
14  store <4 x float> %0, ptr %x, align 16
15  ret void
16}
17
18define void @test1f(ptr nocapture %x, float %a) #0 {
19; CHECK-LABEL: test1f:
20; CHECK:       // %bb.0: // %entry
21; CHECK-NEXT:    fmov.4s v1, #1.00000000
22; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
23; CHECK-NEXT:    mov.s v1[0], v0[0]
24; CHECK-NEXT:    str q1, [x0]
25; CHECK-NEXT:    ret
26entry:
27  %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0
28  store <4 x float> %0, ptr %x, align 16
29  ret void
30}
31
32define <16 x i8> @test_insert_v16i8_insert_1(i8 %a) {
33; CHECK-LABEL: test_insert_v16i8_insert_1:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    movi.2d v0, #0000000000000000
36; CHECK-NEXT:    mov.b v0[14], w0
37; CHECK-NEXT:    ret
38  %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 14
39  ret <16 x i8> %v.0
40}
41
42define <16 x i8> @test_insert_v16i8_insert_2(i8 %a) {
43; CHECK-LABEL: test_insert_v16i8_insert_2:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    movi.2d v0, #0000000000000000
46; CHECK-NEXT:    mov.b v0[1], w0
47; CHECK-NEXT:    mov.b v0[2], w0
48; CHECK-NEXT:    ret
49  %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 2
50  %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1
51  ret <16 x i8> %v.1
52}
53
54define <16 x i8> @test_insert_v16i8_insert_2_undef_base(i8 %a) {
55; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    dup.16b v0, w0
58; CHECK-NEXT:    mov.b v0[5], wzr
59; CHECK-NEXT:    mov.b v0[9], wzr
60; CHECK-NEXT:    ret
61  %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>  , i8 %a, i32 0
62  %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1
63  %v.2 = insertelement <16 x i8> %v.1, i8 %a, i32 2
64  %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3
65  %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4
66  %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6
67  %v.7 = insertelement <16 x i8> %v.6, i8 %a, i32 7
68  %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8
69  %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10
70  %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11
71  %v.12 = insertelement <16 x i8> %v.11, i8 %a, i32 12
72  %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13
73  %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14
74  %v.15 = insertelement <16 x i8> %v.14, i8 %a, i32 15
75  ret <16 x i8> %v.15
76}
77
78define <16 x i8> @test_insert_v16i8_insert_2_undef_base_different_valeus(i8 %a, i8 %b) {
79; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_valeus:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    dup.16b v0, w0
82; CHECK-NEXT:    mov.b v0[2], w1
83; CHECK-NEXT:    mov.b v0[5], wzr
84; CHECK-NEXT:    mov.b v0[7], w1
85; CHECK-NEXT:    mov.b v0[9], wzr
86; CHECK-NEXT:    mov.b v0[12], w1
87; CHECK-NEXT:    mov.b v0[15], w1
88; CHECK-NEXT:    ret
89  %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>  , i8 %a, i32 0
90  %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1
91  %v.2 = insertelement <16 x i8> %v.1, i8 %b, i32 2
92  %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3
93  %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4
94  %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6
95  %v.7 = insertelement <16 x i8> %v.6, i8 %b, i32 7
96  %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8
97  %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10
98  %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11
99  %v.12 = insertelement <16 x i8> %v.11, i8 %b, i32 12
100  %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13
101  %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14
102  %v.15 = insertelement <16 x i8> %v.14, i8 %b, i32 15
103  ret <16 x i8> %v.15
104}
105
106define <8 x half> @test_insert_v8f16_insert_1(half %a) {
107; CHECK-LABEL: test_insert_v8f16_insert_1:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
110; CHECK-NEXT:    dup.8h v0, v0[0]
111; CHECK-NEXT:    mov.h v0[7], wzr
112; CHECK-NEXT:    ret
113  %v.0 = insertelement <8 x half> <half undef, half undef, half undef, half undef, half undef, half undef, half undef, half 0.0>, half %a, i32 0
114  %v.1 = insertelement <8 x half> %v.0, half %a, i32 1
115  %v.2 = insertelement <8 x half> %v.1, half %a, i32 2
116  %v.3 = insertelement <8 x half> %v.2, half %a, i32 3
117  %v.4 = insertelement <8 x half> %v.3, half %a, i32 4
118  %v.5 = insertelement <8 x half> %v.4, half %a, i32 5
119  %v.6 = insertelement <8 x half> %v.5, half %a, i32 6
120  ret <8 x half> %v.6
121}
122
123
124define <8 x half> @test_insert_v8f16_insert_2(half %a) {
125; CHECK-LABEL: test_insert_v8f16_insert_2:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    movi.2d v1, #0000000000000000
128; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
129; CHECK-NEXT:    mov.h v1[1], v0[0]
130; CHECK-NEXT:    mov.h v1[2], v0[0]
131; CHECK-NEXT:    mov.16b v0, v1
132; CHECK-NEXT:    ret
133  %v.0 = insertelement <8 x half> zeroinitializer, half %a, i32 2
134  %v.1 = insertelement <8 x half> %v.0, half %a, i32 1
135  ret <8 x half> %v.1
136}
137
138define <8 x i16> @test_insert_v8i16_insert_2(i16 %a) {
139; CHECK-LABEL: test_insert_v8i16_insert_2:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    dup.8h v0, w0
142; CHECK-NEXT:    mov.h v0[3], wzr
143; CHECK-NEXT:    mov.h v0[7], wzr
144; CHECK-NEXT:    ret
145  %v.0 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0
146  %v.1 = insertelement <8 x i16> %v.0, i16 %a, i32 1
147  %v.2 = insertelement <8 x i16> %v.1, i16 %a, i32 2
148  %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4
149  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
150  %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6
151  ret <8 x i16> %v.5
152}
153
154define <8 x i16> @test_insert_v8i16_insert_3(i16 %a) {
155; CHECK-LABEL: test_insert_v8i16_insert_3:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    dup.8h v0, w0
158; CHECK-NEXT:    mov.h v0[1], wzr
159; CHECK-NEXT:    mov.h v0[3], wzr
160; CHECK-NEXT:    mov.h v0[7], wzr
161; CHECK-NEXT:    ret
162  %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0
163  %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2
164  %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4
165  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
166  %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6
167  ret <8 x i16> %v.5
168}
169
170define <8 x i16> @test_insert_v8i16_insert_4(i16 %a) {
171; CHECK-LABEL: test_insert_v8i16_insert_4:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    movi.2d v0, #0000000000000000
174; CHECK-NEXT:    mov.h v0[0], w0
175; CHECK-NEXT:    mov.h v0[2], w0
176; CHECK-NEXT:    mov.h v0[4], w0
177; CHECK-NEXT:    mov.h v0[5], w0
178; CHECK-NEXT:    ret
179  %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0
180  %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2
181  %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4
182  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
183  ret <8 x i16> %v.4
184}
185
186define <8 x i16> @test_insert_v8i16_insert_5(i16 %a) {
187; CHECK-LABEL: test_insert_v8i16_insert_5:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    movi.2d v0, #0000000000000000
190; CHECK-NEXT:    mov.h v0[0], w0
191; CHECK-NEXT:    mov.h v0[4], w0
192; CHECK-NEXT:    mov.h v0[5], w0
193; CHECK-NEXT:    ret
194  %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0
195  %v.3 = insertelement <8 x i16> %v.0, i16 %a, i32 4
196  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
197  ret <8 x i16> %v.4
198}
199
200define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
201; CHECK-LABEL: test_insert_v2f32_undef_zero_vector:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    movi d1, #0000000000000000
204; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
205; CHECK-NEXT:    mov.s v1[1], v0[0]
206; CHECK-NEXT:    fmov d0, d1
207; CHECK-NEXT:    ret
208  %v.0 = insertelement <2 x float> <float 0.000000e+00, float undef>, float %a, i32 1
209  ret <2 x float> %v.0
210}
211
212define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
213; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
214; CHECK:       // %bb.0:
215; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
216; CHECK-NEXT:    dup.4s v0, v0[0]
217; CHECK-NEXT:    mov.s v0[3], wzr
218; CHECK-NEXT:    ret
219  %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %a, i32 0
220  %v.1 = insertelement <4 x float> %v.0, float %a, i32 1
221  %v.2 = insertelement <4 x float> %v.1, float %a, i32 2
222  ret <4 x float> %v.2
223}
224
225define <4 x float> @test_insert_3_f32_undef(float %a) {
226; CHECK-LABEL: test_insert_3_f32_undef:
227; CHECK:       // %bb.0:
228; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
229; CHECK-NEXT:    dup.4s v0, v0[0]
230; CHECK-NEXT:    ret
231  %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %a, i32 0
232  %v.1 = insertelement <4 x float> %v.0, float %a, i32 1
233  %v.2 = insertelement <4 x float> %v.1, float %a, i32 2
234  ret <4 x float> %v.2
235}
236
237define <4 x float> @test_insert_2_f32_undef_zero(float %a) {
238; CHECK-LABEL: test_insert_2_f32_undef_zero:
239; CHECK:       // %bb.0:
240; CHECK-NEXT:    movi.2d v1, #0000000000000000
241; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
242; CHECK-NEXT:    mov.s v1[0], v0[0]
243; CHECK-NEXT:    mov.s v1[2], v0[0]
244; CHECK-NEXT:    mov.16b v0, v1
245; CHECK-NEXT:    ret
246  %v.0 = insertelement <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, float %a, i32 0
247  %v.1 = insertelement <4 x float> %v.0, float %a, i32 2
248  ret <4 x float> %v.1
249}
250
251define <2 x double> @test_insert_v2f64_undef_insert1(double %a) {
252; CHECK-LABEL: test_insert_v2f64_undef_insert1:
253; CHECK:       // %bb.0:
254; CHECK-NEXT:    movi.2d v1, #0000000000000000
255; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
256; CHECK-NEXT:    mov.d v1[0], v0[0]
257; CHECK-NEXT:    mov.16b v0, v1
258; CHECK-NEXT:    ret
259  %v.0 = insertelement <2 x double > <double undef, double 0.000000e+00>, double %a, i32 0
260  ret <2 x double> %v.0
261}
262
263define <4 x float> @test_insert_2_f32_var(float %a, <4 x float> %b) {
264; CHECK-LABEL: test_insert_2_f32_var:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
267; CHECK-NEXT:    mov.s v1[0], v0[0]
268; CHECK-NEXT:    mov.s v1[2], v0[0]
269; CHECK-NEXT:    mov.16b v0, v1
270; CHECK-NEXT:    ret
271  %v.0 = insertelement <4 x float> %b, float %a, i32 0
272  %v.1 = insertelement <4 x float> %v.0, float %a, i32 2
273  ret <4 x float> %v.1
274}
275
276define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
277; CHECK-LABEL: test_insert_v8i16_i16_zero:
278; CHECK:       // %bb.0:
279; CHECK-NEXT:    mov.h v0[5], wzr
280; CHECK-NEXT:    ret
281  %v.0 = insertelement <8 x i16> %a, i16 0, i32 5
282  ret <8 x i16> %v.0
283}
284
285; TODO: This should jsut be a mov.s v0[3], wzr
286define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
287; CHECK-LABEL: test_insert_v4f16_f16_zero:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
290; CHECK-NEXT:    mov.h v0[0], wzr
291; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
292; CHECK-NEXT:    ret
293  %v.0 = insertelement <4 x half> %a, half 0.000000e+00, i32 0
294  ret <4 x half> %v.0
295}
296
297define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
298; CHECK-LABEL: test_insert_v8f16_f16_zero:
299; CHECK:       // %bb.0:
300; CHECK-NEXT:    mov.h v0[6], wzr
301; CHECK-NEXT:    ret
302  %v.0 = insertelement <8 x half> %a, half 0.000000e+00, i32 6
303  ret <8 x half> %v.0
304}
305
306define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
307; CHECK-LABEL: test_insert_v2f32_f32_zero:
308; CHECK:       // %bb.0:
309; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
310; CHECK-NEXT:    mov.s v0[0], wzr
311; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
312; CHECK-NEXT:    ret
313  %v.0 = insertelement <2 x float> %a, float 0.000000e+00, i32 0
314  ret <2 x float> %v.0
315}
316
317define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
318; CHECK-LABEL: test_insert_v4f32_f32_zero:
319; CHECK:       // %bb.0:
320; CHECK-NEXT:    mov.s v0[3], wzr
321; CHECK-NEXT:    ret
322  %v.0 = insertelement <4 x float> %a, float 0.000000e+00, i32 3
323  ret <4 x float> %v.0
324}
325
326define <2 x double> @test_insert_v2f64_f64_zero(<2 x double> %a) {
327; CHECK-LABEL: test_insert_v2f64_f64_zero:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    mov.d v0[1], xzr
330; CHECK-NEXT:    ret
331  %v.0 = insertelement <2 x double> %a, double 0.000000e+00, i32 1
332  ret <2 x double> %v.0
333}
334