xref: /llvm-project/llvm/test/CodeGen/AArch64/vector-insert-dag-combines.ll (revision f06d96995a3e937ce125fd48efd1026256868b99)
1; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s
2
3; REQUIRES: asserts
4
5; NOTE: Due to their nature the expected inserts and extracts often emit no
6; instructions and so these tests verify the output of DAGCombiner directly.
7
8target triple = "aarch64-unknown-linux-gnu"
9
10; CHECK: Initial selection DAG: %bb.0 'insert_small_fixed_into_big_fixed:'
11; CHECK: SelectionDAG has 10 nodes:
12; CHECK:   t0: ch,glue = EntryToken
13; CHECK:         t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
14; CHECK:       t4: v4i8 = extract_subvector t2, Constant:i64<0>
15; CHECK:     t6: v16i8 = insert_subvector undef:v16i8, t4, Constant:i64<0>
16; CHECK:   t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t6
17; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
18
19; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_fixed_into_big_fixed:'
20; CHECK: SelectionDAG has 9 nodes:
21; CHECK:   t0: ch,glue = EntryToken
22; CHECK:       t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
23; CHECK:     t10: v16i8 = insert_subvector undef:v16i8, t2, Constant:i64<0>
24; CHECK:   t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t10
25; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
26
27define <16 x i8> @insert_small_fixed_into_big_fixed(<8 x i8> %a) #0 {
28  %extract = call <4 x i8> @llvm.vector.extract(<8 x i8> %a, i64 0)
29  %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0)
30  ret <16 x i8> %insert
31}
32
33; CHECK: Initial selection DAG: %bb.0 'insert_small_fixed_into_big_scalable:'
34; CHECK: SelectionDAG has 10 nodes:
35; CHECK:   t0: ch,glue = EntryToken
36; CHECK:         t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
37; CHECK:       t4: v4i8 = extract_subvector t2, Constant:i64<0>
38; CHECK:     t6: nxv16i8 = insert_subvector undef:nxv16i8, t4, Constant:i64<0>
39; CHECK:   t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t6
40; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
41
42; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_fixed_into_big_scalable:'
43; CHECK: SelectionDAG has 9 nodes:
44; CHECK:   t0: ch,glue = EntryToken
45; CHECK:       t2: v8i8,ch = CopyFromReg t0, Register:v8i8 %0
46; CHECK:     t10: nxv16i8 = insert_subvector undef:nxv16i8, t2, Constant:i64<0>
47; CHECK:   t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t10
48; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
49
50define <vscale x 16 x i8> @insert_small_fixed_into_big_scalable(<8 x i8> %a) #0 {
51  %extract = call <4 x i8> @llvm.vector.extract(<8 x i8> %a, i64 0)
52  %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0)
53  ret <vscale x 16 x i8> %insert
54}
55
56; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_fixed:'
57; CHECK: SelectionDAG has 11 nodes:
58; CHECK:   t0: ch,glue = EntryToken
59; CHECK:           t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
60; CHECK:         t3: nxv8i8 = truncate t2
61; CHECK:       t5: v4i8 = extract_subvector t3, Constant:i64<0>
62; CHECK:     t7: v16i8 = insert_subvector undef:v16i8, t5, Constant:i64<0>
63; CHECK:   t9: ch,glue = CopyToReg t0, Register:v16i8 $q0, t7
64; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:v16i8 $q0, t9:1
65
66; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_fixed:'
67; CHECK: SelectionDAG has 11 nodes:
68; CHECK:   t0: ch,glue = EntryToken
69; CHECK:           t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
70; CHECK:         t3: nxv8i8 = truncate t2
71; CHECK:       t5: v4i8 = extract_subvector t3, Constant:i64<0>
72; CHECK:     t7: v16i8 = insert_subvector undef:v16i8, t5, Constant:i64<0>
73; CHECK:   t9: ch,glue = CopyToReg t0, Register:v16i8 $q0, t7
74; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:v16i8 $q0, t9:1
75
76; Resulting insert would not be legal, so there's no transformation.
77define <16 x i8> @insert_small_scalable_into_big_fixed(<vscale x 8 x i8> %a) #0 {
78  %extract = call <4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0)
79  %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0)
80  ret <16 x i8> %insert
81}
82
83; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_1:'
84; CHECK: SelectionDAG has 11 nodes:
85; CHECK:   t0: ch,glue = EntryToken
86; CHECK:           t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
87; CHECK:         t3: nxv8i8 = truncate t2
88; CHECK:       t5: v4i8 = extract_subvector t3, Constant:i64<0>
89; CHECK:     t7: nxv16i8 = insert_subvector undef:nxv16i8, t5, Constant:i64<0>
90; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t7
91; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
92
93; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_1:'
94; CHECK: SelectionDAG has 10 nodes:
95; CHECK:   t0: ch,glue = EntryToken
96; CHECK:         t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
97; CHECK:       t3: nxv8i8 = truncate t2
98; CHECK:     t11: nxv16i8 = insert_subvector undef:nxv16i8, t3, Constant:i64<0>
99; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t11
100; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
101
102define <vscale x 16 x i8> @insert_small_scalable_into_big_scalable_1(<vscale x 8 x i8> %a) #0 {
103  %extract = call <4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0)
104  %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0)
105  ret <vscale x 16 x i8> %insert
106}
107
108; CHECK: Initial selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_2:'
109; CHECK: SelectionDAG has 11 nodes:
110; CHECK:   t0: ch,glue = EntryToken
111; CHECK:           t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
112; CHECK:         t3: nxv8i8 = truncate t2
113; CHECK:       t5: nxv4i8 = extract_subvector t3, Constant:i64<0>
114; CHECK:     t7: nxv16i8 = insert_subvector undef:nxv16i8, t5, Constant:i64<0>
115; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t7
116; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
117
118; CHECK: Optimized lowered selection DAG: %bb.0 'insert_small_scalable_into_big_scalable_2:'
119; CHECK: SelectionDAG has 10 nodes:
120; CHECK:   t0: ch,glue = EntryToken
121; CHECK:         t2: nxv8i16,ch = CopyFromReg t0, Register:nxv8i16 %0
122; CHECK:       t3: nxv8i8 = truncate t2
123; CHECK:     t11: nxv16i8 = insert_subvector undef:nxv16i8, t3, Constant:i64<0>
124; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t11
125; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv16i8 $z0, t9:1
126
127define <vscale x 16 x i8> @insert_small_scalable_into_big_scalable_2(<vscale x 8 x i8> %a) #0 {
128  %extract = call <vscale x 4 x i8> @llvm.vector.extract(<vscale x 8 x i8> %a, i64 0)
129  %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <vscale x 4 x i8> %extract, i64 0)
130  ret <vscale x 16 x i8> %insert
131}
132
133; CHECK: Initial selection DAG: %bb.0 'extract_small_fixed_from_big_fixed:'
134; CHECK: SelectionDAG has 10 nodes:
135; CHECK:   t0: ch,glue = EntryToken
136; CHECK:         t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
137; CHECK:       t4: v4i8 = extract_subvector t2, Constant:i64<0>
138; CHECK:     t6: v8i8 = insert_subvector undef:v8i8, t4, Constant:i64<0>
139; CHECK:   t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t6
140; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
141
142; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_fixed_from_big_fixed:'
143; CHECK: SelectionDAG has 8 nodes:
144; CHECK:   t0: ch,glue = EntryToken
145; CHECK:       t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
146; CHECK:     t10: v8i8 = extract_subvector t2, Constant:i64<0>
147; CHECK:   t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t10
148; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
149
150define <8 x i8> @extract_small_fixed_from_big_fixed(<16 x i8> %a) #0 {
151  %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0)
152  %insert = call <8 x i8> @llvm.vector.insert(<8 x i8> undef, <4 x i8> %extract, i64 0)
153  ret <8 x i8> %insert
154}
155
156; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_fixed:'
157; CHECK: SelectionDAG has 11 nodes:
158; CHECK:   t0: ch,glue = EntryToken
159; CHECK:           t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
160; CHECK:         t4: v4i8 = extract_subvector t2, Constant:i64<0>
161; CHECK:       t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
162; CHECK:     t7: nxv8i16 = any_extend t6
163; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
164; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
165
166; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_fixed:'
167; CHECK: SelectionDAG has 11 nodes:
168; CHECK:   t0: ch,glue = EntryToken
169; CHECK:           t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
170; CHECK:         t4: v4i8 = extract_subvector t2, Constant:i64<0>
171; CHECK:       t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
172; CHECK:     t7: nxv8i16 = any_extend t6
173; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
174; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
175
176; Resulting insert would not be legal, so there's no transformation.
177define <vscale x 8 x i8> @extract_small_scalable_from_big_fixed(<16 x i8> %a) #0 {
178  %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0)
179  %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <4 x i8> %extract, i64 0)
180  ret <vscale x 8 x i8> %insert
181}
182
183; CHECK: Initial selection DAG: %bb.0 'extract_small_fixed_from_big_scalable:'
184; CHECK: SelectionDAG has 10 nodes:
185; CHECK:   t0: ch,glue = EntryToken
186; CHECK:         t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
187; CHECK:       t4: v4i8 = extract_subvector t2, Constant:i64<0>
188; CHECK:     t6: v8i8 = insert_subvector undef:v8i8, t4, Constant:i64<0>
189; CHECK:   t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t6
190; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
191
192; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_fixed_from_big_scalable:'
193; CHECK: SelectionDAG has 8 nodes:
194; CHECK:   t0: ch,glue = EntryToken
195; CHECK:       t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
196; CHECK:     t10: v8i8 = extract_subvector t2, Constant:i64<0>
197; CHECK:   t8: ch,glue = CopyToReg t0, Register:v8i8 $d0, t10
198; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v8i8 $d0, t8:1
199
200define <8 x i8> @extract_small_fixed_from_big_scalable(<vscale x 16 x i8> %a) #0 {
201  %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
202  %insert = call <8 x i8> @llvm.vector.insert(<8 x i8> undef, <4 x i8> %extract, i64 0)
203  ret <8 x i8> %insert
204}
205
206; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_1:'
207; CHECK: SelectionDAG has 11 nodes:
208; CHECK:   t0: ch,glue = EntryToken
209; CHECK:           t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
210; CHECK:         t4: v4i8 = extract_subvector t2, Constant:i64<0>
211; CHECK:       t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
212; CHECK:     t7: nxv8i16 = any_extend t6
213; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
214; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
215
216; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_1:'
217; CHECK: SelectionDAG has 9 nodes:
218; CHECK:   t0: ch,glue = EntryToken
219; CHECK:         t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
220; CHECK:       t11: nxv8i8 = extract_subvector t2, Constant:i64<0>
221; CHECK:     t7: nxv8i16 = any_extend t11
222; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
223; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
224
225define <vscale x 8 x i8> @extract_small_scalable_from_big_scalable_1(<vscale x 16 x i8> %a) #0 {
226  %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
227  %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <4 x i8> %extract, i64 0)
228  ret <vscale x 8 x i8> %insert
229}
230
231; CHECK: Initial selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_2:'
232; CHECK: SelectionDAG has 11 nodes:
233; CHECK:   t0: ch,glue = EntryToken
234; CHECK:           t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
235; CHECK:         t4: nxv4i8 = extract_subvector t2, Constant:i64<0>
236; CHECK:       t6: nxv8i8 = insert_subvector undef:nxv8i8, t4, Constant:i64<0>
237; CHECK:     t7: nxv8i16 = any_extend t6
238; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
239; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
240
241; CHECK: Optimized lowered selection DAG: %bb.0 'extract_small_scalable_from_big_scalable_2:'
242; CHECK: SelectionDAG has 9 nodes:
243; CHECK:   t0: ch,glue = EntryToken
244; CHECK:         t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
245; CHECK:       t11: nxv8i8 = extract_subvector t2, Constant:i64<0>
246; CHECK:     t7: nxv8i16 = any_extend t11
247; CHECK:   t9: ch,glue = CopyToReg t0, Register:nxv8i16 $z0, t7
248; CHECK:   t10: ch = AArch64ISD::RET_GLUE t9, Register:nxv8i16 $z0, t9:1
249
250define <vscale x 8 x i8> @extract_small_scalable_from_big_scalable_2(<vscale x 16 x i8> %a) #0 {
251  %extract = call <vscale x 4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
252  %insert = call <vscale x 8 x i8> @llvm.vector.insert(<vscale x 8 x i8> undef, <vscale x 4 x i8> %extract, i64 0)
253  ret <vscale x 8 x i8> %insert
254}
255
256; CHECK: Initial selection DAG: %bb.0 'extract_fixed_from_scalable:'
257; CHECK: SelectionDAG has 10 nodes:
258; CHECK:   t0: ch,glue = EntryToken
259; CHECK:         t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
260; CHECK:       t4: v4i8 = extract_subvector t2, Constant:i64<0>
261; CHECK:     t6: v16i8 = insert_subvector undef:v16i8, t4, Constant:i64<0>
262; CHECK:   t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t6
263; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
264
265; CHECK: Optimized lowered selection DAG: %bb.0 'extract_fixed_from_scalable:'
266; CHECK: SelectionDAG has 8 nodes:
267; CHECK:   t0: ch,glue = EntryToken
268; CHECK:       t2: nxv16i8,ch = CopyFromReg t0, Register:nxv16i8 %0
269; CHECK:     t10: v16i8 = extract_subvector t2, Constant:i64<0>
270; CHECK:   t8: ch,glue = CopyToReg t0, Register:v16i8 $q0, t10
271; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:v16i8 $q0, t8:1
272
273; A variant of insert_small_scalable_into_big_fixed whose vector types prevent
274; the expected transformation because the resulting insert would not be legal.
275; In this instance their matching minimum vector lengths allow us to perform the
276; opposite transformation and emit an extract instead.
277define <16 x i8> @extract_fixed_from_scalable(<vscale x 16 x i8> %a) #0 {
278  %extract = call <4 x i8> @llvm.vector.extract(<vscale x 16 x i8> %a, i64 0)
279  %insert = call <16 x i8> @llvm.vector.insert(<16 x i8> undef, <4 x i8> %extract, i64 0)
280  ret <16 x i8> %insert
281}
282
283; CHECK: Initial selection DAG: %bb.0 'insert_fixed_into_scalable:'
284; CHECK: SelectionDAG has 10 nodes:
285; CHECK:   t0: ch,glue = EntryToken
286; CHECK:         t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
287; CHECK:       t4: v4i8 = extract_subvector t2, Constant:i64<0>
288; CHECK:     t6: nxv16i8 = insert_subvector undef:nxv16i8, t4, Constant:i64<0>
289; CHECK:   t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t6
290; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
291
292; CHECK: Optimized lowered selection DAG: %bb.0 'insert_fixed_into_scalable:'
293; CHECK: SelectionDAG has 9 nodes:
294; CHECK:   t0: ch,glue = EntryToken
295; CHECK:       t2: v16i8,ch = CopyFromReg t0, Register:v16i8 %0
296; CHECK:     t10: nxv16i8 = insert_subvector undef:nxv16i8, t2, Constant:i64<0>
297; CHECK:   t8: ch,glue = CopyToReg t0, Register:nxv16i8 $z0, t10
298; CHECK:   t9: ch = AArch64ISD::RET_GLUE t8, Register:nxv16i8 $z0, t8:1
299
300; A variant of extract_small_scalable_from_big_fixed whose vector types prevent
301; the expected transformation because the resulting extract would not be legal.
302; In this instance their matching minimum vector lengths allow us to perform the
303; opposite transformation and emit an insert instead.
304define <vscale x 16 x i8> @insert_fixed_into_scalable(<16 x i8> %a) #0 {
305  %extract = call <4 x i8> @llvm.vector.extract(<16 x i8> %a, i64 0)
306  %insert = call <vscale x 16 x i8> @llvm.vector.insert(<vscale x 16 x i8> undef, <4 x i8> %extract, i64 0)
307  ret <vscale x 16 x i8> %insert
308}
309
310attributes #0 = { "target-features"="+sve" }
311