xref: /llvm-project/llvm/test/Analysis/CostModel/AArch64/ext-rhadd.ll (revision 900bea9b1ce095123c03e5bb8834d8fb168378a8)
1; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -check-prefix=SVE
2; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefix=SVE2
3
4; SRHADD
5
6define void @srhadd_i8_sext_i16_fixed(ptr %a, ptr %b, ptr %dst) {
7; SVE-LABEL: 'srhadd_i8_sext_i16_fixed'
8; SVE:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <16 x i8> %ld1 to <16 x i16>
9; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <16 x i8> %ld2 to <16 x i16>
10;
11; SVE2-LABEL: 'srhadd_i8_sext_i16_fixed'
12; SVE2:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <16 x i8> %ld1 to <16 x i16>
13; SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <16 x i8> %ld2 to <16 x i16>
14;
15  %ld1 = load <16 x i8>, ptr %a
16  %ld2 = load <16 x i8>, ptr %b
17  %ext1 = sext <16 x i8> %ld1 to <16 x i16>
18  %ext2 = sext <16 x i8> %ld2 to <16 x i16>
19  %add1 = add nuw nsw <16 x i16> %ext1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 1, i64 0), <16 x i16> poison, <16 x i32> zeroinitializer)
20  %add2 = add nuw nsw <16 x i16> %add1, %ext2
21  %shr = lshr <16 x i16> %add2, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 1, i64 0), <16 x i16> poison, <16 x i32> zeroinitializer)
22  %trunc = trunc <16 x i16> %shr to <16 x i8>
23  store <16 x i8> %trunc, ptr %a
24  ret void
25}
26
27define void @srhadd_i8_sext_i16_scalable(ptr %a, ptr %b, ptr %dst) {
28; SVE-LABEL: 'srhadd_i8_sext_i16_scalable'
29; SVE:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
30; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = sext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
31;
32; SVE2-LABEL: 'srhadd_i8_sext_i16_scalable'
33; SVE2:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
34; SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
35;
36  %ld1 = load <vscale x 16 x i8>, ptr %a
37  %ld2 = load <vscale x 16 x i8>, ptr %b
38  %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
39  %ext2 = sext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
40  %add1 = add nuw nsw <vscale x 16 x i16> %ext1, splat (i16 1)
41  %add2 = add nuw nsw <vscale x 16 x i16> %add1, %ext2
42  %shr = lshr <vscale x 16 x i16> %add2, splat (i16 1)
43  %trunc = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
44  store <vscale x 16 x i8> %trunc, ptr %a
45  ret void
46}
47
48define void @srhadd_i16_sext_i64_scalable(ptr %a, ptr %b, ptr %dst) {
49; SVE-LABEL: 'srhadd_i16_sext_i64_scalable'
50; SVE:       Cost Model: Found an estimated cost of 6 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i64>
51; SVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i64>
52;
53; SVE2-LABEL: 'srhadd_i16_sext_i64_scalable'
54; SVE2:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i64>
55; SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i64>
56;
57  %ld1 = load <vscale x 8 x i16>, ptr %a
58  %ld2 = load <vscale x 8 x i16>, ptr %b
59  %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i64>
60  %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i64>
61  %add1 = add nuw nsw <vscale x 8 x i64> %ext1, splat (i64 1)
62  %add2 = add nuw nsw <vscale x 8 x i64> %add1, %ext2
63  %shr = lshr <vscale x 8 x i64> %add2, splat (i64 1)
64  %trunc = trunc <vscale x 8 x i64> %shr to <vscale x 8 x i16>
65  store <vscale x 8 x i16> %trunc, ptr %a
66  ret void
67}
68
69; URHADD
70
71define void @urhadd_i32_zext_i64_fixed(ptr %a, ptr %b, ptr %dst) {
72; SVE-LABEL: 'urhadd_i32_zext_i64_fixed'
73; SVE:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <4 x i32> %ld1 to <4 x i64>
74; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <4 x i32> %ld2 to <4 x i64>
75;
76; SVE2-LABEL: 'urhadd_i32_zext_i64_fixed'
77; SVE2:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <4 x i32> %ld1 to <4 x i64>
78; SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <4 x i32> %ld2 to <4 x i64>
79;
80  %ld1 = load <4 x i32>, ptr %a
81  %ld2 = load <4 x i32>, ptr %b
82  %ext1 = zext <4 x i32> %ld1 to <4 x i64>
83  %ext2 = zext <4 x i32> %ld2 to <4 x i64>
84  %add1 = add nuw nsw <4 x i64> %ext1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 1, i64 0), <4 x i64> poison, <4 x i32> zeroinitializer)
85  %add2 = add nuw nsw <4 x i64> %add1, %ext2
86  %shr = lshr <4 x i64> %add2, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 1, i64 0), <4 x i64> poison, <4 x i32> zeroinitializer)
87  %trunc = trunc <4 x i64> %shr to <4 x i32>
88  store <4 x i32> %trunc, ptr %a
89  ret void
90}
91
92define void @urhadd_i8_zext_i64(ptr %a, ptr %b, ptr %dst) {
93; SVE-LABEL: 'urhadd_i8_zext_i64'
94; SVE:       Cost Model: Found an estimated cost of 14 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i64>
95; SVE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i64>
96;
97; SVE2-LABEL: 'urhadd_i8_zext_i64'
98; SVE2:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i64>
99; SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i64>
100;
101  %ld1 = load <vscale x 16 x i8>, ptr %a
102  %ld2 = load <vscale x 16 x i8>, ptr %b
103  %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i64>
104  %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i64>
105  %add1 = add nuw nsw <vscale x 16 x i64> %ext1, splat (i64 1)
106  %add2 = add nuw nsw <vscale x 16 x i64> %add1, %ext2
107  %shr = lshr <vscale x 16 x i64> %add2, splat (i64 1)
108  %trunc = trunc <vscale x 16 x i64> %shr to <vscale x 16 x i8>
109  store <vscale x 16 x i8> %trunc, ptr %a
110  ret void
111}
112
113define void @urhadd_i16_zext_i32(ptr %a, ptr %b, ptr %dst) {
114; SVE-LABEL: 'urhadd_i16_zext_i32'
115; SVE:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = zext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32>
116; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32>
117;
118; SVE2-LABEL: 'urhadd_i16_zext_i32'
119; SVE2:       Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32>
120; SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32>
121;
122  %ld1 = load <vscale x 8 x i16>, ptr %a
123  %ld2 = load <vscale x 8 x i16>, ptr %b
124  %ext1 = zext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32>
125  %ext2 = zext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32>
126  %add1 = add nuw nsw <vscale x 8 x i32> %ext1, splat (i32 1)
127  %add2 = add nuw nsw <vscale x 8 x i32> %add1, %ext2
128  %shr = lshr <vscale x 8 x i32> %add2, splat (i32 1)
129  %trunc = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
130  store <vscale x 8 x i16> %trunc, ptr %a
131  ret void
132}
133
134; NEGATIVE TESTS
135
136define void @ext_operand_mismatch(ptr %a, ptr %b, ptr %dst) {
137; SVE-LABEL: 'ext_operand_mismatch'
138; SVE:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
139; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
140;
141; SVE2-LABEL: 'ext_operand_mismatch'
142; SVE2:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
143; SVE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
144;
145  %ld1 = load <vscale x 16 x i8>, ptr %a
146  %ld2 = load <vscale x 16 x i8>, ptr %b
147  %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
148  %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
149  %add1 = add nuw nsw <vscale x 16 x i16> %ext1, splat (i16 1)
150  %add2 = add nuw nsw <vscale x 16 x i16> %add1, %ext2
151  %shr = lshr <vscale x 16 x i16> %add2, splat (i16 1)
152  %trunc = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
153  store <vscale x 16 x i8> %trunc, ptr %a
154  ret void
155}
156
157define void @add_multiple_uses(ptr %a, ptr %b, ptr %dst) {
158; SVE-LABEL: 'add_multiple_uses'
159; SVE:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32>
160; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32>
161;
162; SVE2-LABEL: 'add_multiple_uses'
163; SVE2:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32>
164; SVE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32>
165;
166  %ld1 = load <vscale x 8 x i16>, ptr %a
167  %ld2 = load <vscale x 8 x i16>, ptr %b
168  %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32>
169  %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32>
170  %add1 = add nuw nsw <vscale x 8 x i32> %ext1, splat (i32 1)
171  %add2 = add nuw nsw <vscale x 8 x i32> %add1, %ext2
172  %shr = lshr <vscale x 8 x i32> %add2, splat (i32 1)
173  %trunc = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
174  %add.res = add nuw nsw <vscale x 8 x i32> %add1, %add2
175  %res = trunc <vscale x 8 x i32> %add.res to <vscale x 8 x i16>
176  store <vscale x 8 x i16> %res, ptr %a
177  ret void
178}
179
180define void @shift_multiple_uses(ptr %a, ptr %b, ptr %dst) {
181; SVE-LABEL: 'shift_multiple_uses'
182; SVE:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
183; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
184;
185; SVE2-LABEL: 'shift_multiple_uses'
186; SVE2:       Cost Model: Found an estimated cost of 2 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
187; SVE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
188;
189  %ld1 = load <vscale x 16 x i8>, ptr %a
190  %ld2 = load <vscale x 16 x i8>, ptr %b
191  %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16>
192  %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16>
193  %add1 = add nuw nsw <vscale x 16 x i16> %ext1, splat (i16 1)
194  %add2 = add nuw nsw <vscale x 16 x i16> %add1, %ext2
195  %shr = lshr <vscale x 16 x i16> %add2, splat (i16 1)
196  %trunc = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
197  %add3 = add nuw nsw <vscale x 16 x i16> %shr, %add2
198  %res = trunc <vscale x 16 x i16> %add3 to <vscale x 16 x i8>
199  store <vscale x 16 x i8> %res, ptr %a
200  ret void
201}
202