xref: /llvm-project/llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll (revision 180865a50085a378fa107b965f63cc52d007595e)
1; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=COST
2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5
6; COST-LABEL: trn1.v8i8
7; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
8; CODE-LABEL: trn1.v8i8
9; CODE:       trn1 v0.8b, v0.8b, v1.8b
10define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
11  %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
12  ret <8 x i8> %tmp0
13}
14
15; COST-LABEL: trn2.v8i8
16; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
17; CODE-LABEL: trn2.v8i8
18; CODE:       trn2 v0.8b, v0.8b, v1.8b
19define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
20  %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
21  ret <8 x i8> %tmp0
22}
23
24; COST-LABEL: trn1.v16i8
25; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
26; CODE-LABEL: trn1.v16i8
27; CODE:       trn1 v0.16b, v0.16b, v1.16b
28define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
29  %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
30  ret <16 x i8> %tmp0
31}
32
33; COST-LABEL: trn2.v16i8
34; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
35; CODE-LABEL: trn2.v16i8
36; CODE:       trn2 v0.16b, v0.16b, v1.16b
37define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
38  %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
39  ret <16 x i8> %tmp0
40}
41
42; COST-LABEL: trn1.v4i16
43; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
44; CODE-LABEL: trn1.v4i16
45; CODE:       trn1 v0.4h, v0.4h, v1.4h
46define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
47  %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
48  ret <4 x i16> %tmp0
49}
50
51; COST-LABEL: trn2.v4i16
52; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
53; CODE-LABEL: trn2.v4i16
54; CODE:       trn2 v0.4h, v0.4h, v1.4h
55define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
56  %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
57  ret <4 x i16> %tmp0
58}
59
60; COST-LABEL: trn1.v8i16
61; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
62; CODE-LABEL: trn1.v8i16
63; CODE:       trn1 v0.8h, v0.8h, v1.8h
64define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
65  %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
66  ret <8 x i16> %tmp0
67}
68
69; COST-LABEL: trn2.v8i16
70; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
71; CODE-LABEL: trn2.v8i16
72; CODE:       trn2 v0.8h, v0.8h, v1.8h
73define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
74  %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
75  ret <8 x i16> %tmp0
76}
77
78; COST-LABEL: trn1.v2i32
79; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
80; CODE-LABEL: trn1.v2i32
81; CODE:       zip1 v0.2s, v0.2s, v1.2s
82define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
83  %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
84  ret <2 x i32> %tmp0
85}
86
87; COST-LABEL: trn2.v2i32
88; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
89; CODE-LABEL: trn2.v2i32
90; CODE:       zip2 v0.2s, v0.2s, v1.2s
91define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
92  %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
93  ret <2 x i32> %tmp0
94}
95
96; COST-LABEL: trn1.v4i32
97; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
98; CODE-LABEL: trn1.v4i32
99; CODE:       trn1 v0.4s, v0.4s, v1.4s
100define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
101  %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
102  ret <4 x i32> %tmp0
103}
104
105; COST-LABEL: trn2.v4i32
106; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
107; CODE-LABEL: trn2.v4i32
108; CODE:       trn2 v0.4s, v0.4s, v1.4s
109define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
110  %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
111  ret <4 x i32> %tmp0
112}
113
114; COST-LABEL: trn1.v2i64
115; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
116; CODE-LABEL: trn1.v2i64
117; CODE:       zip1 v0.2d, v0.2d, v1.2d
118define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
119  %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
120  ret <2 x i64> %tmp0
121}
122
123; COST-LABEL: trn2.v2i64
124; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
125; CODE-LABEL: trn2.v2i64
126; CODE:       zip2 v0.2d, v0.2d, v1.2d
127define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
128  %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
129  ret <2 x i64> %tmp0
130}
131
132; COST-LABEL: trn1.v2f32
133; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
134; CODE-LABEL: trn1.v2f32
135; CODE:       zip1 v0.2s, v0.2s, v1.2s
136define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
137  %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
138  ret <2 x float> %tmp0
139}
140
141; COST-LABEL: trn2.v2f32
142; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
143; CODE-LABEL: trn2.v2f32
144; CODE:       zip2 v0.2s, v0.2s, v1.2s
145define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
146  %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
147  ret <2 x float> %tmp0
148}
149
150; COST-LABEL: trn1.v4f32
151; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
152; CODE-LABEL: trn1.v4f32
153; CODE:       trn1 v0.4s, v0.4s, v1.4s
154define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
155  %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
156  ret <4 x float> %tmp0
157}
158
159; COST-LABEL: trn2.v4f32
160; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
161; CODE-LABEL: trn2.v4f32
162; CODE:       trn2 v0.4s, v0.4s, v1.4s
163define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
164  %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
165  ret <4 x float> %tmp0
166}
167
168; COST-LABEL: trn1.v2f64
169; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
170; CODE-LABEL: trn1.v2f64
171; CODE:       zip1 v0.2d, v0.2d, v1.2d
172define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
173  %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
174  ret <2 x double> %tmp0
175}
176
177; COST-LABEL: trn2.v2f64
178; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
179; CODE-LABEL: trn2.v2f64
180; CODE:       zip2 v0.2d, v0.2d, v1.2d
181define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {
182  %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
183  ret <2 x double> %tmp0
184}
185
186; COST-LABEL: trn1.v4f16
187; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
188; CODE-LABEL: trn1.v4f16
189; CODE:       trn1 v0.4h, v0.4h, v1.4h
190define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) {
191  %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
192  ret <4 x half> %tmp0
193}
194
195; COST-LABEL: trn2.v4f16
196; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
197; CODE-LABEL: trn2.v4f16
198; CODE:       trn2 v0.4h, v0.4h, v1.4h
199define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) {
200  %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
201  ret <4 x half> %tmp0
202}
203
204; COST-LABEL: trn1.v8f16
205; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
206; CODE-LABEL: trn1.v8f16
207; CODE:       trn1 v0.8h, v0.8h, v1.8h
208define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) {
209  %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
210  ret <8 x half> %tmp0
211}
212
213; COST-LABEL: trn2.v8f16
214; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
215; CODE-LABEL: trn2.v8f16
216; CODE:       trn2 v0.8h, v0.8h, v1.8h
217define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
218  %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
219  ret <8 x half> %tmp0
220}
221