xref: /llvm-project/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll (revision dec47b76f406242dfb9d36da4d7adfb171c71104)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
3; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
4; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
5; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
6
7; Test a cross-section of intrinsics for various cost-kinds.
8; Other test files may check for accuracy of a particular intrinsic
9; across subtargets or types. This is just a basic correctness check using the
10; default x86 target and a legal scalar type (i32/float) and/or an
11; illegal vector type (16 x i32/float).
12
13declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
14declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
15
16declare i32 @llvm.smax.i32(i32, i32)
17declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
18
19declare float @llvm.copysign.f32(float, float)
20declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
21
22declare float @llvm.fmuladd.f32(float, float, float)
23declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
24
25declare float @llvm.log2.f32(float)
26declare <16 x float> @llvm.log2.v16f32(<16 x float>)
27
28declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
29declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
30
31declare float @llvm.maximum.f32(float, float)
32declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
33
34declare i32 @llvm.cttz.i32(i32, i1)
35declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
36
37declare i32 @llvm.ctlz.i32(i32, i1)
38declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
39
40declare i32 @llvm.fshl.i32(i32, i32, i32)
41declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
42
43declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
44declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
45declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
46declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>)
47declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
48
49declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
50
51define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
52; THRU-LABEL: 'umul'
53; THRU-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
54; THRU-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
55; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
56;
57; LATE-LABEL: 'umul'
58; LATE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
59; LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
60; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
61;
62; SIZE-LABEL: 'umul'
63; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
64; SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
65; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
66;
67; SIZE_LATE-LABEL: 'umul'
68; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
69; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
70; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
71;
72  %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
73  %v = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
74  ret void
75}
76
77define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
78; THRU-LABEL: 'smax'
79; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
80; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
81; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
82;
83; LATE-LABEL: 'smax'
84; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
85; LATE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
86; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
87;
88; SIZE-LABEL: 'smax'
89; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
90; SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
91; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
92;
93; SIZE_LATE-LABEL: 'smax'
94; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
95; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
96; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
97;
98  %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
99  %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
100  ret void
101}
102
103define void @fcopysign(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
104; THRU-LABEL: 'fcopysign'
105; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
106; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
107; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
108;
109; LATE-LABEL: 'fcopysign'
110; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
111; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
112; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
113;
114; SIZE-LABEL: 'fcopysign'
115; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
116; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
117; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
118;
119; SIZE_LATE-LABEL: 'fcopysign'
120; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
121; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
122; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
123;
124  %s = call float @llvm.copysign.f32(float %a, float %b)
125  %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
126  ret void
127}
128
129define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) {
130; THRU-LABEL: 'fmuladd'
131; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
132; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
133; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
134;
135; LATE-LABEL: 'fmuladd'
136; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
137; LATE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
138; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
139;
140; SIZE-LABEL: 'fmuladd'
141; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
142; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
143; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
144;
145; SIZE_LATE-LABEL: 'fmuladd'
146; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
147; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
148; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
149;
150  %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
151  %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
152  ret void
153}
154
155define void @log2(float %a, <16 x float> %va) {
156; THRU-LABEL: 'log2'
157; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
158; THRU-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
159; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
160;
161; LATE-LABEL: 'log2'
162; LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
163; LATE-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
164; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
165;
166; SIZE-LABEL: 'log2'
167; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
168; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
169; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
170;
171; SIZE_LATE-LABEL: 'log2'
172; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
173; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
174; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
175;
176  %s = call float @llvm.log2.f32(float %a)
177  %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
178  ret void
179}
180
181define void @constrained_fadd(float %a, <16 x float> %va) strictfp {
182; THRU-LABEL: 'constrained_fadd'
183; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
184; THRU-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
185; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
186;
187; LATE-LABEL: 'constrained_fadd'
188; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
189; LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
190; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
191;
192; SIZE-LABEL: 'constrained_fadd'
193; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
194; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
195; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
196;
197; SIZE_LATE-LABEL: 'constrained_fadd'
198; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
199; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
200; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
201;
202  %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
203  %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
204  ret void
205}
206
207define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
208; THRU-LABEL: 'fmaximum'
209; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
210; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
211; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
212;
213; LATE-LABEL: 'fmaximum'
214; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
215; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
216; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
217;
218; SIZE-LABEL: 'fmaximum'
219; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
220; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
221; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
222;
223; SIZE_LATE-LABEL: 'fmaximum'
224; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
225; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
226; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
227;
228  %s = call float @llvm.maximum.f32(float %a, float %b)
229  %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
230  ret void
231}
232
233define void @cttz(i32 %a, <16 x i32> %va) {
234; THRU-LABEL: 'cttz'
235; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
236; THRU-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
237; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
238;
239; LATE-LABEL: 'cttz'
240; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
241; LATE-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
242; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
243;
244; SIZE-LABEL: 'cttz'
245; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
246; SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
247; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
248;
249; SIZE_LATE-LABEL: 'cttz'
250; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
251; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
252; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
253;
254  %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
255  %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
256  ret void
257}
258
259define void @ctlz(i32 %a, <16 x i32> %va) {
260; THRU-LABEL: 'ctlz'
261; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
262; THRU-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
263; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
264;
265; LATE-LABEL: 'ctlz'
266; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
267; LATE-NEXT:  Cost Model: Found an estimated cost of 180 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
268; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
269;
270; SIZE-LABEL: 'ctlz'
271; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
272; SIZE-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
273; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
274;
275; SIZE_LATE-LABEL: 'ctlz'
276; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
277; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
278; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
279;
280  %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
281  %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
282  ret void
283}
284
285define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
286; THRU-LABEL: 'fshl'
287; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
288; THRU-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
289; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
290;
291; LATE-LABEL: 'fshl'
292; LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
293; LATE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
294; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
295;
296; SIZE-LABEL: 'fshl'
297; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
298; SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
299; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
300;
301; SIZE_LATE-LABEL: 'fshl'
302; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
303; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 149 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
304; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
305;
306  %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
307  %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
308  ret void
309}
310
311define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) {
312; THRU-LABEL: 'maskedgather'
313; THRU-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
314; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
315;
316; LATE-LABEL: 'maskedgather'
317; LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
318; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
319;
320; SIZE-LABEL: 'maskedgather'
321; SIZE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
322; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
323;
324; SIZE_LATE-LABEL: 'maskedgather'
325; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
326; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
327;
328  %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
329  ret void
330}
331
332define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) {
333; THRU-LABEL: 'maskedscatter'
334; THRU-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
335; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
336;
337; LATE-LABEL: 'maskedscatter'
338; LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
339; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
340;
341; SIZE-LABEL: 'maskedscatter'
342; SIZE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
343; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
344;
345; SIZE_LATE-LABEL: 'maskedscatter'
346; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
347; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
348;
349  call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
350  ret void
351}
352
353define void @reduce_fmax(<16 x float> %va) {
354; THRU-LABEL: 'reduce_fmax'
355; THRU-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
356; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
357;
358; LATE-LABEL: 'reduce_fmax'
359; LATE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
360; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
361;
362; SIZE-LABEL: 'reduce_fmax'
363; SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
364; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
365;
366; SIZE_LATE-LABEL: 'reduce_fmax'
367; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
368; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
369;
370  %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
371  ret void
372}
373
374define void @reduce_fmul(<16 x float> %va) {
375; THRU-LABEL: 'reduce_fmul'
376; THRU-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
377; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
378;
379; LATE-LABEL: 'reduce_fmul'
380; LATE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
381; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
382;
383; SIZE-LABEL: 'reduce_fmul'
384; SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
385; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
386;
387; SIZE_LATE-LABEL: 'reduce_fmul'
388; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
389; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
390;
391  %v = call float @llvm.vector.reduce.fmul.v16f32(float 42.0, <16 x float> %va)
392  ret void
393}
394
395define void @reduce_fadd_fast(<16 x float> %va) {
396; THRU-LABEL: 'reduce_fadd_fast'
397; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
398; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
399;
400; LATE-LABEL: 'reduce_fadd_fast'
401; LATE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
402; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
403;
404; SIZE-LABEL: 'reduce_fadd_fast'
405; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
406; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
407;
408; SIZE_LATE-LABEL: 'reduce_fadd_fast'
409; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
410; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
411;
412  %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %va)
413  ret void
414}
415
416define void @memcpy(ptr %a, ptr %b, i32 %c) {
417; THRU-LABEL: 'memcpy'
418; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
419; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
420;
421; LATE-LABEL: 'memcpy'
422; LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
423; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
424;
425; SIZE-LABEL: 'memcpy'
426; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
427; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
428;
429; SIZE_LATE-LABEL: 'memcpy'
430; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
431; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
432;
433  call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
434  ret void
435}
436