xref: /llvm-project/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6; SCALABLE INSERTED INTO SCALABLE TESTS
7
8define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_zero_i8(ptr %a, ptr %b) #0 {
9; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i8:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ptrue p0.h
12; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
13; CHECK-NEXT:    ptrue p0.s
14; CHECK-NEXT:    ld1b { z1.s }, p0/z, [x1]
15; CHECK-NEXT:    uunpkhi z0.s, z0.h
16; CHECK-NEXT:    uzp1 z0.h, z1.h, z0.h
17; CHECK-NEXT:    ret
18  %vec = load <vscale x 8 x i8>, ptr %a
19  %subvec = load <vscale x 4 x i8>, ptr %b
20  %ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 0)
21  ret <vscale x 8 x i8> %ins
22}
23
24define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_nonzero_i8(ptr %a, ptr %b) #0 {
25; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i8:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    ptrue p0.h
28; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
29; CHECK-NEXT:    ptrue p0.s
30; CHECK-NEXT:    ld1b { z1.s }, p0/z, [x1]
31; CHECK-NEXT:    uunpklo z0.s, z0.h
32; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
33; CHECK-NEXT:    ret
34  %vec = load <vscale x 8 x i8>, ptr %a
35  %subvec = load <vscale x 4 x i8>, ptr %b
36  %ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 4)
37  ret <vscale x 8 x i8> %ins
38}
39
40define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_zero_i16(ptr %a, ptr %b) #0 {
41; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i16:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    ptrue p0.s
44; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
45; CHECK-NEXT:    ptrue p0.d
46; CHECK-NEXT:    ld1h { z1.d }, p0/z, [x1]
47; CHECK-NEXT:    uunpkhi z0.d, z0.s
48; CHECK-NEXT:    uzp1 z0.s, z1.s, z0.s
49; CHECK-NEXT:    ret
50  %vec = load <vscale x 4 x i16>, ptr %a
51  %subvec = load <vscale x 2 x i16>, ptr %b
52  %ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 0)
53  ret <vscale x 4 x i16> %ins
54}
55
56define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_nonzero_i16(ptr %a, ptr %b) #0 {
57; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i16:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    ptrue p0.s
60; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
61; CHECK-NEXT:    ptrue p0.d
62; CHECK-NEXT:    ld1h { z1.d }, p0/z, [x1]
63; CHECK-NEXT:    uunpklo z0.d, z0.s
64; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
65; CHECK-NEXT:    ret
66  %vec = load <vscale x 4 x i16>, ptr %a
67  %subvec = load <vscale x 2 x i16>, ptr %b
68  %ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 2)
69  ret <vscale x 4 x i16> %ins
70}
71
72; FIXED INSERTED INTO SCALABLE TESTS
73
74define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_zero_i8(ptr %a, ptr %b) #0 {
75; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i8:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    ptrue p0.h
78; CHECK-NEXT:    ldr d0, [x1]
79; CHECK-NEXT:    ld1b { z1.h }, p0/z, [x0]
80; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
81; CHECK-NEXT:    ptrue p0.h, vl8
82; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
83; CHECK-NEXT:    ret
84  %vec = load <vscale x 8 x i8>, ptr %a
85  %subvec = load <8 x i8>, ptr %b
86  %ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 0)
87  ret <vscale x 8 x i8> %ins
88}
89
90define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_nonzero_i8(ptr %a, ptr %b) #0 {
91; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i8:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
94; CHECK-NEXT:    addvl sp, sp, #-1
95; CHECK-NEXT:    cnth x8
96; CHECK-NEXT:    ptrue p0.h
97; CHECK-NEXT:    ldr d1, [x1]
98; CHECK-NEXT:    sub x8, x8, #8
99; CHECK-NEXT:    mov w9, #8 // =0x8
100; CHECK-NEXT:    cmp x8, #8
101; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
102; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
103; CHECK-NEXT:    csel x8, x8, x9, lo
104; CHECK-NEXT:    mov x9, sp
105; CHECK-NEXT:    lsl x8, x8, #1
106; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
107; CHECK-NEXT:    str q1, [x9, x8]
108; CHECK-NEXT:    ld1h { z0.h }, p0/z, [sp]
109; CHECK-NEXT:    addvl sp, sp, #1
110; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
111; CHECK-NEXT:    ret
112  %vec = load <vscale x 8 x i8>, ptr %a
113  %subvec = load <8 x i8>, ptr %b
114  %ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 8)
115  ret <vscale x 8 x i8> %ins
116}
117
118define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_zero_i16(ptr %a, ptr %b) #0 {
119; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i16:
120; CHECK:       // %bb.0:
121; CHECK-NEXT:    ptrue p0.s
122; CHECK-NEXT:    ldr d0, [x1]
123; CHECK-NEXT:    ld1h { z1.s }, p0/z, [x0]
124; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
125; CHECK-NEXT:    ptrue p0.s, vl4
126; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
127; CHECK-NEXT:    ret
128  %vec = load <vscale x 4 x i16>, ptr %a
129  %subvec = load <4 x i16>, ptr %b
130  %ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 0)
131  ret <vscale x 4 x i16> %ins
132}
133
134define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_nonzero_i16(ptr %a, ptr %b) #0 {
135; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i16:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
138; CHECK-NEXT:    addvl sp, sp, #-1
139; CHECK-NEXT:    cntw x8
140; CHECK-NEXT:    ptrue p0.s
141; CHECK-NEXT:    ldr d1, [x1]
142; CHECK-NEXT:    sub x8, x8, #4
143; CHECK-NEXT:    mov w9, #4 // =0x4
144; CHECK-NEXT:    cmp x8, #4
145; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
146; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
147; CHECK-NEXT:    csel x8, x8, x9, lo
148; CHECK-NEXT:    mov x9, sp
149; CHECK-NEXT:    lsl x8, x8, #2
150; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
151; CHECK-NEXT:    str q1, [x9, x8]
152; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
153; CHECK-NEXT:    addvl sp, sp, #1
154; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
155; CHECK-NEXT:    ret
156  %vec = load <vscale x 4 x i16>, ptr %a
157  %subvec = load <4 x i16>, ptr %b
158  %ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 4)
159  ret <vscale x 4 x i16> %ins
160}
161
162define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_zero_i32(ptr %a, ptr %b) #0 {
163; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i32:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    ptrue p0.d
166; CHECK-NEXT:    ldr d0, [x1]
167; CHECK-NEXT:    ld1w { z1.d }, p0/z, [x0]
168; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
169; CHECK-NEXT:    ptrue p0.d, vl2
170; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
171; CHECK-NEXT:    ret
172  %vec = load <vscale x 2 x i32>, ptr %a
173  %subvec = load <2 x i32>, ptr %b
174  %ins = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 0)
175  ret <vscale x 2 x i32> %ins
176}
177
178define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_i32(ptr %a, ptr %b) #0 {
179; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i32:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
182; CHECK-NEXT:    addvl sp, sp, #-1
183; CHECK-NEXT:    cntd x8
184; CHECK-NEXT:    ptrue p0.d
185; CHECK-NEXT:    ldr d1, [x1]
186; CHECK-NEXT:    sub x8, x8, #2
187; CHECK-NEXT:    mov w9, #2 // =0x2
188; CHECK-NEXT:    cmp x8, #2
189; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
190; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
191; CHECK-NEXT:    csel x8, x8, x9, lo
192; CHECK-NEXT:    mov x9, sp
193; CHECK-NEXT:    lsl x8, x8, #3
194; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
195; CHECK-NEXT:    str q1, [x9, x8]
196; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp]
197; CHECK-NEXT:    addvl sp, sp, #1
198; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
199; CHECK-NEXT:    ret
200  %vec = load <vscale x 2 x i32>, ptr %a
201  %subvec = load <2 x i32>, ptr %b
202  %ins = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 2)
203  ret <vscale x 2 x i32> %ins
204}
205
206define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr %a, ptr %b) #1 {
207; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_large_i32:
208; CHECK:       // %bb.0:
209; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
210; CHECK-NEXT:    addvl sp, sp, #-1
211; CHECK-NEXT:    ptrue p0.d
212; CHECK-NEXT:    ptrue p1.d, vl8
213; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
214; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
215; CHECK-NEXT:    ld1w { z0.d }, p1/z, [x1]
216; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
217; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp]
218; CHECK-NEXT:    addvl sp, sp, #1
219; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
220; CHECK-NEXT:    ret
221  %vec = load <vscale x 2 x i32>, ptr %a
222  %subvec = load <8 x i32>, ptr %b
223  %ins = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> %vec, <8 x i32> %subvec, i64 8)
224  ret <vscale x 2 x i32> %ins
225}
226
227declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8>, <vscale x 4 x i8>, i64)
228declare <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16>, <vscale x 2 x i16>, i64)
229
230declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8>, <8 x i8>, i64)
231declare <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16>, <4 x i16>, i64)
232declare <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32>, <2 x i32>, i64)
233
234declare <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32>, <8 x i32>, i64)
235
236attributes #0 = { nounwind "target-features"="+sve" }
237attributes #1 = { nounwind "target-features"="+sve" vscale_range(4,4) }
238