xref: /llvm-project/llvm/test/Analysis/CostModel/AArch64/free-widening-casts.ll (revision 2a859b20146108af84c741a509dc0e534e045768)
1; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=COST
2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5
6; COST-LABEL: uaddl_8h
7; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
8; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
9; CODE-LABEL: uaddl_8h
10; CODE:       uaddl v0.8h, v0.8b, v1.8b
11define <8 x i16> @uaddl_8h(<8 x i8> %a, <8 x i8> %b) {
12  %tmp0 = zext <8 x i8> %a to <8 x i16>
13  %tmp1 = zext <8 x i8> %b to <8 x i16>
14  %tmp2 = add <8 x i16> %tmp0, %tmp1
15  ret <8 x i16> %tmp2
16}
17
18; COST-LABEL: uaddl_4s
19; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
20; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
21; CODE-LABEL: uaddl_4s
22; CODE:       uaddl v0.4s, v0.4h, v1.4h
23define <4 x i32> @uaddl_4s(<4 x i16> %a, <4 x i16> %b) {
24  %tmp0 = zext <4 x i16> %a to <4 x i32>
25  %tmp1 = zext <4 x i16> %b to <4 x i32>
26  %tmp2 = add <4 x i32> %tmp0, %tmp1
27  ret <4 x i32> %tmp2
28}
29
30; COST-LABEL: uaddl_2d
31; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
32; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64>
33; CODE-LABEL: uaddl_2d
34; CODE:       uaddl v0.2d, v0.2s, v1.2s
35define <2 x i64> @uaddl_2d(<2 x i32> %a, <2 x i32> %b) {
36  %tmp0 = zext <2 x i32> %a to <2 x i64>
37  %tmp1 = zext <2 x i32> %b to <2 x i64>
38  %tmp2 = add <2 x i64> %tmp0, %tmp1
39  ret <2 x i64> %tmp2
40}
41
42; COST-LABEL: uaddl2_8h
43; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
44; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16>
45; CODE-LABEL: uaddl2_8h
46; CODE:       uaddl2 v2.8h, v0.16b, v1.16b
47; CODE-NEXT:  uaddl v0.8h, v0.8b, v1.8b
48define <16 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) {
49  %tmp0 = zext <16 x i8> %a to <16 x i16>
50  %tmp1 = zext <16 x i8> %b to <16 x i16>
51  %tmp2 = add <16 x i16> %tmp0, %tmp1
52  ret <16 x i16> %tmp2
53}
54
55; COST-LABEL: uaddl2_4s
56; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
57; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32>
58; CODE-LABEL: uaddl2_4s
59; CODE:       uaddl2 v2.4s, v0.8h, v1.8h
60; CODE-NEXT:  uaddl v0.4s, v0.4h, v1.4h
61define <8 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) {
62  %tmp0 = zext <8 x i16> %a to <8 x i32>
63  %tmp1 = zext <8 x i16> %b to <8 x i32>
64  %tmp2 = add <8 x i32> %tmp0, %tmp1
65  ret <8 x i32> %tmp2
66}
67
68; COST-LABEL: uaddl2_2d
69; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
70; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64>
71; CODE-LABEL: uaddl2_2d
72; CODE:       uaddl2 v2.2d, v0.4s, v1.4s
73; CODE-NEXT:  uaddl v0.2d, v0.2s, v1.2s
74define <4 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) {
75  %tmp0 = zext <4 x i32> %a to <4 x i64>
76  %tmp1 = zext <4 x i32> %b to <4 x i64>
77  %tmp2 = add <4 x i64> %tmp0, %tmp1
78  ret <4 x i64> %tmp2
79}
80
81; COST-LABEL: saddl_8h
82; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
83; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16>
84; CODE-LABEL: saddl_8h
85; CODE:       saddl v0.8h, v0.8b, v1.8b
86define <8 x i16> @saddl_8h(<8 x i8> %a, <8 x i8> %b) {
87  %tmp0 = sext <8 x i8> %a to <8 x i16>
88  %tmp1 = sext <8 x i8> %b to <8 x i16>
89  %tmp2 = add <8 x i16> %tmp0, %tmp1
90  ret <8 x i16> %tmp2
91}
92
93; COST-LABEL: saddl_4s
94; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
95; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32>
96; CODE-LABEL: saddl_4s
97; CODE:       saddl v0.4s, v0.4h, v1.4h
98define <4 x i32> @saddl_4s(<4 x i16> %a, <4 x i16> %b) {
99  %tmp0 = sext <4 x i16> %a to <4 x i32>
100  %tmp1 = sext <4 x i16> %b to <4 x i32>
101  %tmp2 = add <4 x i32> %tmp0, %tmp1
102  ret <4 x i32> %tmp2
103}
104
105; COST-LABEL: saddl_2d
106; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
107; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64>
108; CODE-LABEL: saddl_2d
109; CODE:       saddl v0.2d, v0.2s, v1.2s
110define <2 x i64> @saddl_2d(<2 x i32> %a, <2 x i32> %b) {
111  %tmp0 = sext <2 x i32> %a to <2 x i64>
112  %tmp1 = sext <2 x i32> %b to <2 x i64>
113  %tmp2 = add <2 x i64> %tmp0, %tmp1
114  ret <2 x i64> %tmp2
115}
116
117; COST-LABEL: saddl2_8h
118; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
119; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16>
120; CODE-LABEL: saddl2_8h
121; CODE:       saddl2 v2.8h, v0.16b, v1.16b
122; CODE-NEXT:  saddl v0.8h, v0.8b, v1.8b
123define <16 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) {
124  %tmp0 = sext <16 x i8> %a to <16 x i16>
125  %tmp1 = sext <16 x i8> %b to <16 x i16>
126  %tmp2 = add <16 x i16> %tmp0, %tmp1
127  ret <16 x i16> %tmp2
128}
129
130; COST-LABEL: saddl2_4s
131; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
132; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32>
133; CODE-LABEL: saddl2_4s
134; CODE:       saddl2 v2.4s, v0.8h, v1.8h
135; CODE-NEXT:  saddl v0.4s, v0.4h, v1.4h
136define <8 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) {
137  %tmp0 = sext <8 x i16> %a to <8 x i32>
138  %tmp1 = sext <8 x i16> %b to <8 x i32>
139  %tmp2 = add <8 x i32> %tmp0, %tmp1
140  ret <8 x i32> %tmp2
141}
142
143; COST-LABEL: saddl2_2d
144; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
145; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64>
146; CODE-LABEL: saddl2_2d
147; CODE:       saddl2 v2.2d, v0.4s, v1.4s
148; CODE-NEXT:  saddl v0.2d, v0.2s, v1.2s
149define <4 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) {
150  %tmp0 = sext <4 x i32> %a to <4 x i64>
151  %tmp1 = sext <4 x i32> %b to <4 x i64>
152  %tmp2 = add <4 x i64> %tmp0, %tmp1
153  ret <4 x i64> %tmp2
154}
155
156; COST-LABEL: usubl_8h
157; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
158; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
159; CODE-LABEL: usubl_8h
160; CODE:       usubl v0.8h, v0.8b, v1.8b
161define <8 x i16> @usubl_8h(<8 x i8> %a, <8 x i8> %b) {
162  %tmp0 = zext <8 x i8> %a to <8 x i16>
163  %tmp1 = zext <8 x i8> %b to <8 x i16>
164  %tmp2 = sub <8 x i16> %tmp0, %tmp1
165  ret <8 x i16> %tmp2
166}
167
168; COST-LABEL: usubl_4s
169; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
170; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
171; CODE-LABEL: usubl_4s
172; CODE:       usubl v0.4s, v0.4h, v1.4h
173define <4 x i32> @usubl_4s(<4 x i16> %a, <4 x i16> %b) {
174  %tmp0 = zext <4 x i16> %a to <4 x i32>
175  %tmp1 = zext <4 x i16> %b to <4 x i32>
176  %tmp2 = sub <4 x i32> %tmp0, %tmp1
177  ret <4 x i32> %tmp2
178}
179
180; COST-LABEL: usubl_2d
181; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
182; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64>
183; CODE-LABEL: usubl_2d
184; CODE:       usubl v0.2d, v0.2s, v1.2s
185define <2 x i64> @usubl_2d(<2 x i32> %a, <2 x i32> %b) {
186  %tmp0 = zext <2 x i32> %a to <2 x i64>
187  %tmp1 = zext <2 x i32> %b to <2 x i64>
188  %tmp2 = sub <2 x i64> %tmp0, %tmp1
189  ret <2 x i64> %tmp2
190}
191
192; COST-LABEL: usubl2_8h
193; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
194; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16>
195; CODE-LABEL: usubl2_8h
196; CODE:       usubl2 v2.8h, v0.16b, v1.16b
197; CODE-NEXT:  usubl v0.8h, v0.8b, v1.8b
198define <16 x i16> @usubl2_8h(<16 x i8> %a, <16 x i8> %b) {
199  %tmp0 = zext <16 x i8> %a to <16 x i16>
200  %tmp1 = zext <16 x i8> %b to <16 x i16>
201  %tmp2 = sub <16 x i16> %tmp0, %tmp1
202  ret <16 x i16> %tmp2
203}
204
205; COST-LABEL: usubl2_4s
206; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
207; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32>
208; CODE-LABEL: usubl2_4s
209; CODE:       usubl2 v2.4s, v0.8h, v1.8h
210; CODE-NEXT:  usubl v0.4s, v0.4h, v1.4h
211define <8 x i32> @usubl2_4s(<8 x i16> %a, <8 x i16> %b) {
212  %tmp0 = zext <8 x i16> %a to <8 x i32>
213  %tmp1 = zext <8 x i16> %b to <8 x i32>
214  %tmp2 = sub <8 x i32> %tmp0, %tmp1
215  ret <8 x i32> %tmp2
216}
217
218; COST-LABEL: usubl2_2d
219; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
220; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64>
221; CODE-LABEL: usubl2_2d
222; CODE:       usubl2 v2.2d, v0.4s, v1.4s
223; CODE-NEXT:  usubl v0.2d, v0.2s, v1.2s
224define <4 x i64> @usubl2_2d(<4 x i32> %a, <4 x i32> %b) {
225  %tmp0 = zext <4 x i32> %a to <4 x i64>
226  %tmp1 = zext <4 x i32> %b to <4 x i64>
227  %tmp2 = sub <4 x i64> %tmp0, %tmp1
228  ret <4 x i64> %tmp2
229}
230
231; COST-LABEL: ssubl_8h
232; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
233; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16>
234; CODE-LABEL: ssubl_8h
235; CODE:       ssubl v0.8h, v0.8b, v1.8b
236define <8 x i16> @ssubl_8h(<8 x i8> %a, <8 x i8> %b) {
237  %tmp0 = sext <8 x i8> %a to <8 x i16>
238  %tmp1 = sext <8 x i8> %b to <8 x i16>
239  %tmp2 = sub <8 x i16> %tmp0, %tmp1
240  ret <8 x i16> %tmp2
241}
242
243; COST-LABEL: ssubl_4s
244; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
245; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32>
246; CODE-LABEL: ssubl_4s
247; CODE:       ssubl v0.4s, v0.4h, v1.4h
248define <4 x i32> @ssubl_4s(<4 x i16> %a, <4 x i16> %b) {
249  %tmp0 = sext <4 x i16> %a to <4 x i32>
250  %tmp1 = sext <4 x i16> %b to <4 x i32>
251  %tmp2 = sub <4 x i32> %tmp0, %tmp1
252  ret <4 x i32> %tmp2
253}
254
255; COST-LABEL: ssubl_2d
256; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
257; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64>
258; CODE-LABEL: ssubl_2d
259; CODE:       ssubl v0.2d, v0.2s, v1.2s
260define <2 x i64> @ssubl_2d(<2 x i32> %a, <2 x i32> %b) {
261  %tmp0 = sext <2 x i32> %a to <2 x i64>
262  %tmp1 = sext <2 x i32> %b to <2 x i64>
263  %tmp2 = sub <2 x i64> %tmp0, %tmp1
264  ret <2 x i64> %tmp2
265}
266
267; COST-LABEL: ssubl2_8h
268; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
269; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16>
270; CODE-LABEL: ssubl2_8h
271; CODE:       ssubl2 v2.8h, v0.16b, v1.16b
272; CODE-NEXT:  ssubl v0.8h, v0.8b, v1.8b
273define <16 x i16> @ssubl2_8h(<16 x i8> %a, <16 x i8> %b) {
274  %tmp0 = sext <16 x i8> %a to <16 x i16>
275  %tmp1 = sext <16 x i8> %b to <16 x i16>
276  %tmp2 = sub <16 x i16> %tmp0, %tmp1
277  ret <16 x i16> %tmp2
278}
279
280; COST-LABEL: ssubl2_4s
281; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
282; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32>
283; CODE-LABEL: ssubl2_4s
284; CODE:       ssubl2 v2.4s, v0.8h, v1.8h
285; CODE-NEXT:  ssubl v0.4s, v0.4h, v1.4h
286define <8 x i32> @ssubl2_4s(<8 x i16> %a, <8 x i16> %b) {
287  %tmp0 = sext <8 x i16> %a to <8 x i32>
288  %tmp1 = sext <8 x i16> %b to <8 x i32>
289  %tmp2 = sub <8 x i32> %tmp0, %tmp1
290  ret <8 x i32> %tmp2
291}
292
293; COST-LABEL: ssubl2_2d
294; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
295; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64>
296; CODE-LABEL: ssubl2_2d
297; CODE:       ssubl2 v2.2d, v0.4s, v1.4s
298; CODE-NEXT:  ssubl v0.2d, v0.2s, v1.2s
299define <4 x i64> @ssubl2_2d(<4 x i32> %a, <4 x i32> %b) {
300  %tmp0 = sext <4 x i32> %a to <4 x i64>
301  %tmp1 = sext <4 x i32> %b to <4 x i64>
302  %tmp2 = sub <4 x i64> %tmp0, %tmp1
303  ret <4 x i64> %tmp2
304}
305
306; COST-LABEL: uaddw_8h
307; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
308; CODE-LABEL: uaddw_8h
309; CODE:       uaddw v0.8h, v1.8h, v0.8b
310define <8 x i16> @uaddw_8h(<8 x i8> %a, <8 x i16> %b) {
311  %tmp0 = zext <8 x i8> %a to <8 x i16>
312  %tmp1 = add <8 x i16> %b, %tmp0
313  ret <8 x i16> %tmp1
314}
315
316; COST-LABEL: uaddw_4s
317; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
318; CODE-LABEL: uaddw_4s
319; CODE:       uaddw v0.4s, v1.4s, v0.4h
320define <4 x i32> @uaddw_4s(<4 x i16> %a, <4 x i32> %b) {
321  %tmp0 = zext <4 x i16> %a to <4 x i32>
322  %tmp1 = add <4 x i32> %b, %tmp0
323  ret <4 x i32> %tmp1
324}
325
326; COST-LABEL: uaddw_2d
327; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
328; CODE-LABEL: uaddw_2d
329; CODE:       uaddw v0.2d, v1.2d, v0.2s
330define <2 x i64> @uaddw_2d(<2 x i32> %a, <2 x i64> %b) {
331  %tmp0 = zext <2 x i32> %a to <2 x i64>
332  %tmp1 = add <2 x i64> %b, %tmp0
333  ret <2 x i64> %tmp1
334}
335
336; COST-LABEL: uaddw2_8h
337; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
338; CODE-LABEL: uaddw2_8h
339; CODE:       uaddw2 v2.8h, v2.8h, v0.16b
340; CODE-NEXT:  uaddw v0.8h, v1.8h, v0.8b
341define <16 x i16> @uaddw2_8h(<16 x i8> %a, <16 x i16> %b) {
342  %tmp0 = zext <16 x i8> %a to <16 x i16>
343  %tmp1 = add <16 x i16> %b, %tmp0
344  ret <16 x i16> %tmp1
345}
346
347; COST-LABEL: uaddw2_4s
348; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
349; CODE-LABEL: uaddw2_4s
350; CODE:       uaddw2 v2.4s, v2.4s, v0.8h
351; CODE-NEXT:  uaddw v0.4s, v1.4s, v0.4h
352define <8 x i32> @uaddw2_4s(<8 x i16> %a, <8 x i32> %b) {
353  %tmp0 = zext <8 x i16> %a to <8 x i32>
354  %tmp1 = add <8 x i32> %b, %tmp0
355  ret <8 x i32> %tmp1
356}
357
358; COST-LABEL: uaddw2_2d
359; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
360; CODE-LABEL: uaddw2_2d
361; CODE:       uaddw2 v2.2d, v2.2d, v0.4s
362; CODE-NEXT:  uaddw v0.2d, v1.2d, v0.2s
363define <4 x i64> @uaddw2_2d(<4 x i32> %a, <4 x i64> %b) {
364  %tmp0 = zext <4 x i32> %a to <4 x i64>
365  %tmp1 = add <4 x i64> %b, %tmp0
366  ret <4 x i64> %tmp1
367}
368
369; COST-LABEL: saddw_8h
370; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
371; CODE-LABEL: saddw_8h
372; CODE:       saddw v0.8h, v1.8h, v0.8b
373define <8 x i16> @saddw_8h(<8 x i8> %a, <8 x i16> %b) {
374  %tmp0 = sext <8 x i8> %a to <8 x i16>
375  %tmp1 = add <8 x i16> %b, %tmp0
376  ret <8 x i16> %tmp1
377}
378
379; COST-LABEL: saddw_4s
380; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
381; CODE-LABEL: saddw_4s
382; CODE:       saddw v0.4s, v1.4s, v0.4h
383define <4 x i32> @saddw_4s(<4 x i16> %a, <4 x i32> %b) {
384  %tmp0 = sext <4 x i16> %a to <4 x i32>
385  %tmp1 = add <4 x i32> %b, %tmp0
386  ret <4 x i32> %tmp1
387}
388
389; COST-LABEL: saddw_2d
390; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
391; CODE-LABEL: saddw_2d
392; CODE:       saddw v0.2d, v1.2d, v0.2s
393define <2 x i64> @saddw_2d(<2 x i32> %a, <2 x i64> %b) {
394  %tmp0 = sext <2 x i32> %a to <2 x i64>
395  %tmp1 = add <2 x i64> %b, %tmp0
396  ret <2 x i64> %tmp1
397}
398
399; COST-LABEL: saddw2_8h
400; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
401; CODE-LABEL: saddw2_8h
402; CODE:       saddw2 v2.8h, v2.8h, v0.16b
403; CODE-NEXT:  saddw v0.8h, v1.8h, v0.8b
404define <16 x i16> @saddw2_8h(<16 x i8> %a, <16 x i16> %b) {
405  %tmp0 = sext <16 x i8> %a to <16 x i16>
406  %tmp1 = add <16 x i16> %b, %tmp0
407  ret <16 x i16> %tmp1
408}
409
410; COST-LABEL: saddw2_4s
411; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
412; CODE-LABEL: saddw2_4s
413; CODE:       saddw2 v2.4s, v2.4s, v0.8h
414; CODE-NEXT:  saddw v0.4s, v1.4s, v0.4h
415define <8 x i32> @saddw2_4s(<8 x i16> %a, <8 x i32> %b) {
416  %tmp0 = sext <8 x i16> %a to <8 x i32>
417  %tmp1 = add <8 x i32> %b, %tmp0
418  ret <8 x i32> %tmp1
419}
420
421; COST-LABEL: saddw2_2d
422; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
423; CODE-LABEL: saddw2_2d
424; CODE:       saddw2 v2.2d, v2.2d, v0.4s
425; CODE-NEXT:  saddw v0.2d, v1.2d, v0.2s
426define <4 x i64> @saddw2_2d(<4 x i32> %a, <4 x i64> %b) {
427  %tmp0 = sext <4 x i32> %a to <4 x i64>
428  %tmp1 = add <4 x i64> %b, %tmp0
429  ret <4 x i64> %tmp1
430}
431
432; COST-LABEL: usubw_8h
433; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
434; CODE-LABEL: usubw_8h
435; CODE:       usubw v0.8h, v1.8h, v0.8b
436define <8 x i16> @usubw_8h(<8 x i8> %a, <8 x i16> %b) {
437  %tmp0 = zext <8 x i8> %a to <8 x i16>
438  %tmp1 = sub <8 x i16> %b, %tmp0
439  ret <8 x i16> %tmp1
440}
441
442; COST-LABEL: usubw_4s
443; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
444; CODE-LABEL: usubw_4s
445; CODE:       usubw v0.4s, v1.4s, v0.4h
446define <4 x i32> @usubw_4s(<4 x i16> %a, <4 x i32> %b) {
447  %tmp0 = zext <4 x i16> %a to <4 x i32>
448  %tmp1 = sub <4 x i32> %b, %tmp0
449  ret <4 x i32> %tmp1
450}
451
452; COST-LABEL: usubw_2d
453; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
454; CODE-LABEL: usubw_2d
455; CODE:       usubw v0.2d, v1.2d, v0.2s
456define <2 x i64> @usubw_2d(<2 x i32> %a, <2 x i64> %b) {
457  %tmp0 = zext <2 x i32> %a to <2 x i64>
458  %tmp1 = sub <2 x i64> %b, %tmp0
459  ret <2 x i64> %tmp1
460}
461
462; COST-LABEL: usubw2_8h
463; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
464; CODE-LABEL: usubw2_8h
465; CODE:       usubw2 v2.8h, v2.8h, v0.16b
466; CODE-NEXT:  usubw v0.8h, v1.8h, v0.8b
467define <16 x i16> @usubw2_8h(<16 x i8> %a, <16 x i16> %b) {
468  %tmp0 = zext <16 x i8> %a to <16 x i16>
469  %tmp1 = sub <16 x i16> %b, %tmp0
470  ret <16 x i16> %tmp1
471}
472
473; COST-LABEL: usubw2_4s
474; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
475; CODE-LABEL: usubw2_4s
476; CODE:       usubw2 v2.4s, v2.4s, v0.8h
477; CODE-NEXT:  usubw v0.4s, v1.4s, v0.4h
478define <8 x i32> @usubw2_4s(<8 x i16> %a, <8 x i32> %b) {
479  %tmp0 = zext <8 x i16> %a to <8 x i32>
480  %tmp1 = sub <8 x i32> %b, %tmp0
481  ret <8 x i32> %tmp1
482}
483
484; COST-LABEL: usubw2_2d
485; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
486; CODE-LABEL: usubw2_2d
487; CODE:       usubw2 v2.2d, v2.2d, v0.4s
488; CODE-NEXT:  usubw v0.2d, v1.2d, v0.2s
489define <4 x i64> @usubw2_2d(<4 x i32> %a, <4 x i64> %b) {
490  %tmp0 = zext <4 x i32> %a to <4 x i64>
491  %tmp1 = sub <4 x i64> %b, %tmp0
492  ret <4 x i64> %tmp1
493}
494
495; COST-LABEL: ssubw_8h
496; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
497; CODE-LABEL: ssubw_8h
498; CODE:       ssubw v0.8h, v1.8h, v0.8b
499define <8 x i16> @ssubw_8h(<8 x i8> %a, <8 x i16> %b) {
500  %tmp0 = sext <8 x i8> %a to <8 x i16>
501  %tmp1 = sub <8 x i16> %b, %tmp0
502  ret <8 x i16> %tmp1
503}
504
505; COST-LABEL: ssubw_4s
506; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
507; CODE-LABEL: ssubw_4s
508; CODE:       ssubw v0.4s, v1.4s, v0.4h
509define <4 x i32> @ssubw_4s(<4 x i16> %a, <4 x i32> %b) {
510  %tmp0 = sext <4 x i16> %a to <4 x i32>
511  %tmp1 = sub <4 x i32> %b, %tmp0
512  ret <4 x i32> %tmp1
513}
514
515; COST-LABEL: ssubw_2d
516; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
517; CODE-LABEL: ssubw_2d
518; CODE:       ssubw v0.2d, v1.2d, v0.2s
519define <2 x i64> @ssubw_2d(<2 x i32> %a, <2 x i64> %b) {
520  %tmp0 = sext <2 x i32> %a to <2 x i64>
521  %tmp1 = sub <2 x i64> %b, %tmp0
522  ret <2 x i64> %tmp1
523}
524
525; COST-LABEL: ssubw2_8h
526; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
527; CODE-LABEL: ssubw2_8h
528; CODE:       ssubw2 v2.8h, v2.8h, v0.16b
529; CODE-NEXT:  ssubw v0.8h, v1.8h, v0.8b
530define <16 x i16> @ssubw2_8h(<16 x i8> %a, <16 x i16> %b) {
531  %tmp0 = sext <16 x i8> %a to <16 x i16>
532  %tmp1 = sub <16 x i16> %b, %tmp0
533  ret <16 x i16> %tmp1
534}
535
536; COST-LABEL: ssubw2_4s
537; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
538; CODE-LABEL: ssubw2_4s
539; CODE:       ssubw2 v2.4s, v2.4s, v0.8h
540; CODE-NEXT:  ssubw v0.4s, v1.4s, v0.4h
541define <8 x i32> @ssubw2_4s(<8 x i16> %a, <8 x i32> %b) {
542  %tmp0 = sext <8 x i16> %a to <8 x i32>
543  %tmp1 = sub <8 x i32> %b, %tmp0
544  ret <8 x i32> %tmp1
545}
546
547; COST-LABEL: ssubw2_2d
548; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
549; CODE-LABEL: ssubw2_2d
550; CODE:       ssubw2 v2.2d, v2.2d, v0.4s
551; CODE-NEXT:  ssubw v0.2d, v1.2d, v0.2s
552define <4 x i64> @ssubw2_2d(<4 x i32> %a, <4 x i64> %b) {
553  %tmp0 = sext <4 x i32> %a to <4 x i64>
554  %tmp1 = sub <4 x i64> %b, %tmp0
555  ret <4 x i64> %tmp1
556}
557
558; COST-LABEL: neg_wrong_operand_order
559; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
560define <8 x i16> @neg_wrong_operand_order(<8 x i8> %a, <8 x i16> %b) {
561  %tmp0 = zext <8 x i8> %a to <8 x i16>
562  %tmp1 = sub <8 x i16> %tmp0, %b
563  ret <8 x i16> %tmp1
564}
565
566; COST-LABEL: neg_non_widening_op
567; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
568define <8 x i16> @neg_non_widening_op(<8 x i8> %a, <8 x i16> %b) {
569  %tmp0 = zext <8 x i8> %a to <8 x i16>
570  %tmp1 = udiv <8 x i16> %b, %tmp0
571  ret <8 x i16> %tmp1
572}
573
574; COST-LABEL: neg_dissimilar_operand_kind_0
575; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
576; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
577define <8 x i16> @neg_dissimilar_operand_kind_0(<8 x i8> %a, <8 x i8> %b) {
578  %tmp0 = sext <8 x i8> %a to <8 x i16>
579  %tmp1 = zext <8 x i8> %b to <8 x i16>
580  %tmp2 = add <8 x i16> %tmp0, %tmp1
581  ret <8 x i16> %tmp2
582}
583
584; COST-LABEL: neg_dissimilar_operand_kind_1
585; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <4 x i8> %a to <4 x i32>
586; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
587define <4 x i32> @neg_dissimilar_operand_kind_1(<4 x i8> %a, <4 x i16> %b) {
588  %tmp0 = zext <4 x i8> %a to <4 x i32>
589  %tmp1 = zext <4 x i16> %b to <4 x i32>
590  %tmp2 = add <4 x i32> %tmp0, %tmp1
591  ret <4 x i32> %tmp2
592}
593
594; COST-LABEL: neg_illegal_vector_type_0
595; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <16 x i4> %a to <16 x i8>
596define <16 x i8> @neg_illegal_vector_type_0(<16 x i4> %a, <16 x i8> %b) {
597  %tmp0 = zext <16 x i4> %a to <16 x i8>
598  %tmp1 = sub <16 x i8> %b, %tmp0
599  ret <16 x i8> %tmp1
600}
601
602; COST-LABEL: neg_llegal_vector_type_1
603; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <1 x i16> %a to <1 x i32>
604define <1 x i32> @neg_llegal_vector_type_1(<1 x i16> %a, <1 x i32> %b) {
605  %tmp0 = zext <1 x i16> %a to <1 x i32>
606  %tmp1 = add <1 x i32> %b, %tmp0
607  ret <1 x i32> %tmp1
608}
609
610; COST-LABEL: neg_llegal_vector_type_2
611; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i64>
612define <4 x i64> @neg_llegal_vector_type_2(<4 x i16> %a, <4 x i64> %b) {
613  %tmp0 = zext <4 x i16> %a to <4 x i64>
614  %tmp1 = add <4 x i64> %b, %tmp0
615  ret <4 x i64> %tmp1
616}
617
618; COST-LABEL: neg_llegal_vector_type_3
619; COST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %tmp0 = zext <3 x i34> %a to <3 x i68>
620define <3 x i68> @neg_llegal_vector_type_3(<3 x i34> %a, <3 x i68> %b) {
621  %tmp0 = zext <3 x i34> %a to <3 x i68>
622  %tmp1 = add <3 x i68> %b, %tmp0
623  ret <3 x i68> %tmp1
624}
625