xref: /llvm-project/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll (revision 400b725c2740ee29560bac9ad870b4ddb56bf3f2)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
3; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
4; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
5; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
6
7define i32 @reduce_i1(i32 %arg) {
8; CHECK-LABEL: 'reduce_i1'
9; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
10; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
11; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
12; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
13; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
14; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
15; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
16; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
17; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
18;
19; SIZE-LABEL: 'reduce_i1'
20; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
21; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
22; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
23; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
24; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
25; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
26; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
27; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
28; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
29;
30  %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
31  %V2   = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
32  %V4   = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
33  %V8   = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
34  %V16  = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
35  %V32  = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
36  %V64  = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
37  %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
38  ret i32 undef
39}
40
41define i32 @reduce_i8(i32 %arg) {
42; CHECK-LABEL: 'reduce_i8'
43; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
44; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
45; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
46; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
47; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
48; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
49; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
50; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
51; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
52;
53; SIZE-LABEL: 'reduce_i8'
54; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
55; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
56; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
57; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
58; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
59; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
60; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
61; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
62; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
63;
64  %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
65  %V2   = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
66  %V4   = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
67  %V8   = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
68  %V16  = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
69  %V32  = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
70  %V64  = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
71  %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
72  ret i32 undef
73}
74
75define i32 @reduce_i16(i32 %arg) {
76; CHECK-LABEL: 'reduce_i16'
77; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
78; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
79; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
80; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
81; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
82; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
83; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
84; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
85; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
86;
87; SIZE-LABEL: 'reduce_i16'
88; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
89; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
90; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
91; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
92; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
93; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
94; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
95; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
96; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
97;
98  %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
99  %V2   = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
100  %V4   = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
101  %V8   = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
102  %V16  = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
103  %V32  = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
104  %V64  = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
105  %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
106  ret i32 undef
107}
108
109define i32 @reduce_i32(i32 %arg) {
110; CHECK-LABEL: 'reduce_i32'
111; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
112; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
113; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
114; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
115; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
116; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
117; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
118; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
119; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
120;
121; SIZE-LABEL: 'reduce_i32'
122; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
123; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
124; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
125; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
126; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
127; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
128; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
129; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
130; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
131;
132  %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
133  %V2   = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
134  %V4   = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
135  %V8   = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
136  %V16  = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
137  %V32  = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
138  %V64  = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
139  %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
140  ret i32 undef
141}
142
143define i32 @reduce_i64(i32 %arg) {
144; CHECK-LABEL: 'reduce_i64'
145; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
146; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
147; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
148; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
149; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
150; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
151; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
152; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
153; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
154;
155; SIZE-LABEL: 'reduce_i64'
156; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
157; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
158; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
159; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
160; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
161; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
162; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
163; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
164; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
165;
166  %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
167  %V2   = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
168  %V4   = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
169  %V8   = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
170  %V16  = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
171  %V32  = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
172  %V64  = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
173  %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
174  ret i32 undef
175}
176
177declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>)
178declare i1 @llvm.vector.reduce.add.v2i1(<2 x i1>)
179declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1>)
180declare i1 @llvm.vector.reduce.add.v8i1(<8 x i1>)
181declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>)
182declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>)
183declare i1 @llvm.vector.reduce.add.v64i1(<64 x i1>)
184declare i1 @llvm.vector.reduce.add.v128i1(<128 x i1>)
185declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
186declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
187declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
188declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
189declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
190declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
191declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
192declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
193declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>)
194declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
195declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
196declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
197declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
198declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
199declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
200declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>)
201declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>)
202declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
203declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
204declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
205declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
206declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
207declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>)
208declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>)
209declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
210declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
211declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
212declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
213declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
214declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>)
215declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>)
216declare i64 @llvm.vector.reduce.add.v128i64(<128 x i64>)
217