xref: /llvm-project/llvm/test/CodeGen/WebAssembly/vector-reduce.ll (revision a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=wasm32 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD128
3
4define i64 @pairwise_add_v2i64(<2 x i64> %arg) {
5; SIMD128-LABEL: pairwise_add_v2i64:
6; SIMD128:         .functype pairwise_add_v2i64 (v128) -> (i64)
7; SIMD128-NEXT:  # %bb.0:
8; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
9; SIMD128-NEXT:    i64x2.add $push1=, $0, $pop0
10; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
11; SIMD128-NEXT:    return $pop2
12  %res = tail call i64 @llvm.vector.reduce.add.i64.v4i64(<2 x i64> %arg)
13  ret i64 %res
14}
15
16define i32 @pairwise_add_v4i32(<4 x i32> %arg) {
17; SIMD128-LABEL: pairwise_add_v4i32:
18; SIMD128:         .functype pairwise_add_v4i32 (v128) -> (i32)
19; SIMD128-NEXT:  # %bb.0:
20; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
21; SIMD128-NEXT:    i32x4.add $push5=, $0, $pop0
22; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
23; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
24; SIMD128-NEXT:    i32x4.add $push2=, $pop4, $pop1
25; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
26; SIMD128-NEXT:    return $pop3
27  %res = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg)
28  ret i32 %res
29}
30
31define i16 @pairwise_add_v8i16(<8 x i16> %arg) {
32; SIMD128-LABEL: pairwise_add_v8i16:
33; SIMD128:         .functype pairwise_add_v8i16 (v128) -> (i32)
34; SIMD128-NEXT:  # %bb.0:
35; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
36; SIMD128-NEXT:    i16x8.add $push8=, $0, $pop0
37; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
38; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
39; SIMD128-NEXT:    i16x8.add $push6=, $pop7, $pop1
40; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
41; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
42; SIMD128-NEXT:    i16x8.add $push3=, $pop5, $pop2
43; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
44; SIMD128-NEXT:    return $pop4
45  %res = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %arg)
46  ret i16 %res
47}
48
49define i8 @pairwise_add_v16i8(<16 x i8> %arg) {
50; SIMD128-LABEL: pairwise_add_v16i8:
51; SIMD128:         .functype pairwise_add_v16i8 (v128) -> (i32)
52; SIMD128-NEXT:  # %bb.0:
53; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
54; SIMD128-NEXT:    i8x16.add $push11=, $0, $pop0
55; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
56; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57; SIMD128-NEXT:    i8x16.add $push9=, $pop10, $pop1
58; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
59; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
60; SIMD128-NEXT:    i8x16.add $push7=, $pop8, $pop2
61; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
62; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
63; SIMD128-NEXT:    i8x16.add $push4=, $pop6, $pop3
64; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
65; SIMD128-NEXT:    return $pop5
66  %res = tail call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %arg)
67  ret i8 %res
68}
69
70define i64 @pairwise_mul_v2i64(<2 x i64> %arg) {
71; SIMD128-LABEL: pairwise_mul_v2i64:
72; SIMD128:         .functype pairwise_mul_v2i64 (v128) -> (i64)
73; SIMD128-NEXT:  # %bb.0:
74; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
75; SIMD128-NEXT:    i64x2.mul $push1=, $0, $pop0
76; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
77; SIMD128-NEXT:    return $pop2
78  %res = tail call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %arg)
79  ret i64 %res
80}
81
82define i32 @pairwise_mul_v4i32(<4 x i32> %arg) {
83; SIMD128-LABEL: pairwise_mul_v4i32:
84; SIMD128:         .functype pairwise_mul_v4i32 (v128) -> (i32)
85; SIMD128-NEXT:  # %bb.0:
86; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
87; SIMD128-NEXT:    i32x4.mul $push5=, $0, $pop0
88; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
89; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
90; SIMD128-NEXT:    i32x4.mul $push2=, $pop4, $pop1
91; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
92; SIMD128-NEXT:    return $pop3
93  %res = tail call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %arg)
94  ret i32 %res
95}
96
97define i16 @pairwise_mul_v8i16(<8 x i16> %arg) {
98; SIMD128-LABEL: pairwise_mul_v8i16:
99; SIMD128:         .functype pairwise_mul_v8i16 (v128) -> (i32)
100; SIMD128-NEXT:  # %bb.0:
101; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
102; SIMD128-NEXT:    i16x8.mul $push8=, $0, $pop0
103; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
104; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
105; SIMD128-NEXT:    i16x8.mul $push6=, $pop7, $pop1
106; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
107; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
108; SIMD128-NEXT:    i16x8.mul $push3=, $pop5, $pop2
109; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
110; SIMD128-NEXT:    return $pop4
111  %res = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %arg)
112  ret i16 %res
113}
114
115define i8 @pairwise_mul_v16i8(<16 x i8> %arg) {
116; SIMD128-LABEL: pairwise_mul_v16i8:
117; SIMD128:         .functype pairwise_mul_v16i8 (v128) -> (i32)
118; SIMD128-NEXT:  # %bb.0:
119; SIMD128-NEXT:    i8x16.extract_lane_u $push26=, $0, 0
120; SIMD128-NEXT:    i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
121; SIMD128-NEXT:    local.tee $push31=, $1=, $pop32
122; SIMD128-NEXT:    i8x16.extract_lane_u $push25=, $pop31, 0
123; SIMD128-NEXT:    i32.mul $push27=, $pop26, $pop25
124; SIMD128-NEXT:    i8x16.extract_lane_u $push23=, $0, 4
125; SIMD128-NEXT:    i8x16.extract_lane_u $push22=, $1, 4
126; SIMD128-NEXT:    i32.mul $push24=, $pop23, $pop22
127; SIMD128-NEXT:    i32.mul $push28=, $pop27, $pop24
128; SIMD128-NEXT:    i8x16.extract_lane_u $push19=, $0, 2
129; SIMD128-NEXT:    i8x16.extract_lane_u $push18=, $1, 2
130; SIMD128-NEXT:    i32.mul $push20=, $pop19, $pop18
131; SIMD128-NEXT:    i8x16.extract_lane_u $push16=, $0, 6
132; SIMD128-NEXT:    i8x16.extract_lane_u $push15=, $1, 6
133; SIMD128-NEXT:    i32.mul $push17=, $pop16, $pop15
134; SIMD128-NEXT:    i32.mul $push21=, $pop20, $pop17
135; SIMD128-NEXT:    i32.mul $push29=, $pop28, $pop21
136; SIMD128-NEXT:    i8x16.extract_lane_u $push11=, $0, 1
137; SIMD128-NEXT:    i8x16.extract_lane_u $push10=, $1, 1
138; SIMD128-NEXT:    i32.mul $push12=, $pop11, $pop10
139; SIMD128-NEXT:    i8x16.extract_lane_u $push8=, $0, 5
140; SIMD128-NEXT:    i8x16.extract_lane_u $push7=, $1, 5
141; SIMD128-NEXT:    i32.mul $push9=, $pop8, $pop7
142; SIMD128-NEXT:    i32.mul $push13=, $pop12, $pop9
143; SIMD128-NEXT:    i8x16.extract_lane_u $push4=, $0, 3
144; SIMD128-NEXT:    i8x16.extract_lane_u $push3=, $1, 3
145; SIMD128-NEXT:    i32.mul $push5=, $pop4, $pop3
146; SIMD128-NEXT:    i8x16.extract_lane_u $push1=, $0, 7
147; SIMD128-NEXT:    i8x16.extract_lane_u $push0=, $1, 7
148; SIMD128-NEXT:    i32.mul $push2=, $pop1, $pop0
149; SIMD128-NEXT:    i32.mul $push6=, $pop5, $pop2
150; SIMD128-NEXT:    i32.mul $push14=, $pop13, $pop6
151; SIMD128-NEXT:    i32.mul $push30=, $pop29, $pop14
152; SIMD128-NEXT:    return $pop30
153  %res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg)
154  ret i8 %res
155}
156
157define i64 @pairwise_and_v2i64(<2 x i64> %arg) {
158; SIMD128-LABEL: pairwise_and_v2i64:
159; SIMD128:         .functype pairwise_and_v2i64 (v128) -> (i64)
160; SIMD128-NEXT:  # %bb.0:
161; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
162; SIMD128-NEXT:    v128.and $push1=, $0, $pop0
163; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
164; SIMD128-NEXT:    return $pop2
165  %res = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %arg)
166  ret i64 %res
167}
168
169define i32 @pairwise_and_v4i32(<4 x i32> %arg) {
170; SIMD128-LABEL: pairwise_and_v4i32:
171; SIMD128:         .functype pairwise_and_v4i32 (v128) -> (i32)
172; SIMD128-NEXT:  # %bb.0:
173; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
174; SIMD128-NEXT:    v128.and $push5=, $0, $pop0
175; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
176; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
177; SIMD128-NEXT:    v128.and $push2=, $pop4, $pop1
178; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
179; SIMD128-NEXT:    return $pop3
180  %res = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %arg)
181  ret i32 %res
182}
183
184define i16 @pairwise_and_v8i16(<8 x i16> %arg) {
185; SIMD128-LABEL: pairwise_and_v8i16:
186; SIMD128:         .functype pairwise_and_v8i16 (v128) -> (i32)
187; SIMD128-NEXT:  # %bb.0:
188; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
189; SIMD128-NEXT:    v128.and $push8=, $0, $pop0
190; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
191; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
192; SIMD128-NEXT:    v128.and $push6=, $pop7, $pop1
193; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
194; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
195; SIMD128-NEXT:    v128.and $push3=, $pop5, $pop2
196; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
197; SIMD128-NEXT:    return $pop4
198  %res = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %arg)
199  ret i16 %res
200}
201
202define i8 @pairwise_and_v16i8(<16 x i8> %arg) {
203; SIMD128-LABEL: pairwise_and_v16i8:
204; SIMD128:         .functype pairwise_and_v16i8 (v128) -> (i32)
205; SIMD128-NEXT:  # %bb.0:
206; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
207; SIMD128-NEXT:    v128.and $push11=, $0, $pop0
208; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
209; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
210; SIMD128-NEXT:    v128.and $push9=, $pop10, $pop1
211; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
212; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
213; SIMD128-NEXT:    v128.and $push7=, $pop8, $pop2
214; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
215; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
216; SIMD128-NEXT:    v128.and $push4=, $pop6, $pop3
217; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
218; SIMD128-NEXT:    return $pop5
219  %res = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %arg)
220  ret i8 %res
221}
222
223define i64 @pairwise_or_v2i64(<2 x i64> %arg) {
224; SIMD128-LABEL: pairwise_or_v2i64:
225; SIMD128:         .functype pairwise_or_v2i64 (v128) -> (i64)
226; SIMD128-NEXT:  # %bb.0:
227; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
228; SIMD128-NEXT:    v128.or $push1=, $0, $pop0
229; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
230; SIMD128-NEXT:    return $pop2
231  %res = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %arg)
232  ret i64 %res
233}
234
235define i32 @pairwise_or_v4i32(<4 x i32> %arg) {
236; SIMD128-LABEL: pairwise_or_v4i32:
237; SIMD128:         .functype pairwise_or_v4i32 (v128) -> (i32)
238; SIMD128-NEXT:  # %bb.0:
239; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
240; SIMD128-NEXT:    v128.or $push5=, $0, $pop0
241; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
242; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
243; SIMD128-NEXT:    v128.or $push2=, $pop4, $pop1
244; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
245; SIMD128-NEXT:    return $pop3
246  %res = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %arg)
247  ret i32 %res
248}
249
250define i16 @pairwise_or_v8i16(<8 x i16> %arg) {
251; SIMD128-LABEL: pairwise_or_v8i16:
252; SIMD128:         .functype pairwise_or_v8i16 (v128) -> (i32)
253; SIMD128-NEXT:  # %bb.0:
254; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
255; SIMD128-NEXT:    v128.or $push8=, $0, $pop0
256; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
257; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
258; SIMD128-NEXT:    v128.or $push6=, $pop7, $pop1
259; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
260; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
261; SIMD128-NEXT:    v128.or $push3=, $pop5, $pop2
262; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
263; SIMD128-NEXT:    return $pop4
264  %res = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %arg)
265  ret i16 %res
266}
267
268define i8 @pairwise_or_v16i8(<16 x i8> %arg) {
269; SIMD128-LABEL: pairwise_or_v16i8:
270; SIMD128:         .functype pairwise_or_v16i8 (v128) -> (i32)
271; SIMD128-NEXT:  # %bb.0:
272; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
273; SIMD128-NEXT:    v128.or $push11=, $0, $pop0
274; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
275; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
276; SIMD128-NEXT:    v128.or $push9=, $pop10, $pop1
277; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
278; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
279; SIMD128-NEXT:    v128.or $push7=, $pop8, $pop2
280; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
281; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
282; SIMD128-NEXT:    v128.or $push4=, $pop6, $pop3
283; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
284; SIMD128-NEXT:    return $pop5
285  %res = tail call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %arg)
286  ret i8 %res
287}
288
289define i64 @pairwise_xor_v2i64(<2 x i64> %arg) {
290; SIMD128-LABEL: pairwise_xor_v2i64:
291; SIMD128:         .functype pairwise_xor_v2i64 (v128) -> (i64)
292; SIMD128-NEXT:  # %bb.0:
293; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
294; SIMD128-NEXT:    v128.xor $push1=, $0, $pop0
295; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
296; SIMD128-NEXT:    return $pop2
297  %res = tail call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %arg)
298  ret i64 %res
299}
300
301define i32 @pairwise_xor_v4i32(<4 x i32> %arg) {
302; SIMD128-LABEL: pairwise_xor_v4i32:
303; SIMD128:         .functype pairwise_xor_v4i32 (v128) -> (i32)
304; SIMD128-NEXT:  # %bb.0:
305; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
306; SIMD128-NEXT:    v128.xor $push5=, $0, $pop0
307; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
308; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
309; SIMD128-NEXT:    v128.xor $push2=, $pop4, $pop1
310; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
311; SIMD128-NEXT:    return $pop3
312  %res = tail call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %arg)
313  ret i32 %res
314}
315
316define i16 @pairwise_xor_v8i16(<8 x i16> %arg) {
317; SIMD128-LABEL: pairwise_xor_v8i16:
318; SIMD128:         .functype pairwise_xor_v8i16 (v128) -> (i32)
319; SIMD128-NEXT:  # %bb.0:
320; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
321; SIMD128-NEXT:    v128.xor $push8=, $0, $pop0
322; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
323; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
324; SIMD128-NEXT:    v128.xor $push6=, $pop7, $pop1
325; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
326; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
327; SIMD128-NEXT:    v128.xor $push3=, $pop5, $pop2
328; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
329; SIMD128-NEXT:    return $pop4
330  %res = tail call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %arg)
331  ret i16 %res
332}
333
334define i8 @pairwise_xor_v16i8(<16 x i8> %arg) {
335; SIMD128-LABEL: pairwise_xor_v16i8:
336; SIMD128:         .functype pairwise_xor_v16i8 (v128) -> (i32)
337; SIMD128-NEXT:  # %bb.0:
338; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
339; SIMD128-NEXT:    v128.xor $push11=, $0, $pop0
340; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
341; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
342; SIMD128-NEXT:    v128.xor $push9=, $pop10, $pop1
343; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
344; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
345; SIMD128-NEXT:    v128.xor $push7=, $pop8, $pop2
346; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
347; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
348; SIMD128-NEXT:    v128.xor $push4=, $pop6, $pop3
349; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
350; SIMD128-NEXT:    return $pop5
351  %res = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %arg)
352  ret i8 %res
353}
354
355define i64 @pairwise_smax_v2i64(<2 x i64> %arg) {
356; SIMD128-LABEL: pairwise_smax_v2i64:
357; SIMD128:         .functype pairwise_smax_v2i64 (v128) -> (i64)
358; SIMD128-NEXT:  # %bb.0:
359; SIMD128-NEXT:    i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
360; SIMD128-NEXT:    local.tee $push3=, $1=, $pop4
361; SIMD128-NEXT:    i64x2.gt_s $push0=, $0, $1
362; SIMD128-NEXT:    v128.bitselect $push1=, $0, $pop3, $pop0
363; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
364; SIMD128-NEXT:    return $pop2
365  %res = tail call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %arg)
366  ret i64 %res
367}
368
369define i32 @pairwise_smax_v4i32(<4 x i32> %arg) {
370; SIMD128-LABEL: pairwise_smax_v4i32:
371; SIMD128:         .functype pairwise_smax_v4i32 (v128) -> (i32)
372; SIMD128-NEXT:  # %bb.0:
373; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
374; SIMD128-NEXT:    i32x4.max_s $push5=, $0, $pop0
375; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
376; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
377; SIMD128-NEXT:    i32x4.max_s $push2=, $pop4, $pop1
378; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
379; SIMD128-NEXT:    return $pop3
380  %res = tail call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arg)
381  ret i32 %res
382}
383
384define i16 @pairwise_smax_v8i16(<8 x i16> %arg) {
385; SIMD128-LABEL: pairwise_smax_v8i16:
386; SIMD128:         .functype pairwise_smax_v8i16 (v128) -> (i32)
387; SIMD128-NEXT:  # %bb.0:
388; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
389; SIMD128-NEXT:    i16x8.max_s $push8=, $0, $pop0
390; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
391; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
392; SIMD128-NEXT:    i16x8.max_s $push6=, $pop7, $pop1
393; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
394; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
395; SIMD128-NEXT:    i16x8.max_s $push3=, $pop5, $pop2
396; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
397; SIMD128-NEXT:    return $pop4
398  %res = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arg)
399  ret i16 %res
400}
401
402define i8 @pairwise_smax_v16i8(<16 x i8> %arg) {
403; SIMD128-LABEL: pairwise_smax_v16i8:
404; SIMD128:         .functype pairwise_smax_v16i8 (v128) -> (i32)
405; SIMD128-NEXT:  # %bb.0:
406; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
407; SIMD128-NEXT:    i8x16.max_s $push11=, $0, $pop0
408; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
409; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
410; SIMD128-NEXT:    i8x16.max_s $push9=, $pop10, $pop1
411; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
412; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
413; SIMD128-NEXT:    i8x16.max_s $push7=, $pop8, $pop2
414; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
415; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
416; SIMD128-NEXT:    i8x16.max_s $push4=, $pop6, $pop3
417; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
418; SIMD128-NEXT:    return $pop5
419  %res = tail call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arg)
420  ret i8 %res
421}
422
423define i64 @pairwise_smin_v2i64(<2 x i64> %arg) {
424; SIMD128-LABEL: pairwise_smin_v2i64:
425; SIMD128:         .functype pairwise_smin_v2i64 (v128) -> (i64)
426; SIMD128-NEXT:  # %bb.0:
427; SIMD128-NEXT:    i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
428; SIMD128-NEXT:    local.tee $push3=, $1=, $pop4
429; SIMD128-NEXT:    i64x2.lt_s $push0=, $0, $1
430; SIMD128-NEXT:    v128.bitselect $push1=, $0, $pop3, $pop0
431; SIMD128-NEXT:    i64x2.extract_lane $push2=, $pop1, 0
432; SIMD128-NEXT:    return $pop2
433  %res = tail call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %arg)
434  ret i64 %res
435}
436
437define i32 @pairwise_smin_v4i32(<4 x i32> %arg) {
438; SIMD128-LABEL: pairwise_smin_v4i32:
439; SIMD128:         .functype pairwise_smin_v4i32 (v128) -> (i32)
440; SIMD128-NEXT:  # %bb.0:
441; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
442; SIMD128-NEXT:    i32x4.min_s $push5=, $0, $pop0
443; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
444; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
445; SIMD128-NEXT:    i32x4.min_s $push2=, $pop4, $pop1
446; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
447; SIMD128-NEXT:    return $pop3
448  %res = tail call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arg)
449  ret i32 %res
450}
451
452define i16 @pairwise_smin_v8i16(<8 x i16> %arg) {
453; SIMD128-LABEL: pairwise_smin_v8i16:
454; SIMD128:         .functype pairwise_smin_v8i16 (v128) -> (i32)
455; SIMD128-NEXT:  # %bb.0:
456; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
457; SIMD128-NEXT:    i16x8.min_s $push8=, $0, $pop0
458; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
459; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
460; SIMD128-NEXT:    i16x8.min_s $push6=, $pop7, $pop1
461; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
462; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
463; SIMD128-NEXT:    i16x8.min_s $push3=, $pop5, $pop2
464; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
465; SIMD128-NEXT:    return $pop4
466  %res = tail call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arg)
467  ret i16 %res
468}
469
470define i8 @pairwise_smin_v16i8(<16 x i8> %arg) {
471; SIMD128-LABEL: pairwise_smin_v16i8:
472; SIMD128:         .functype pairwise_smin_v16i8 (v128) -> (i32)
473; SIMD128-NEXT:  # %bb.0:
474; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
475; SIMD128-NEXT:    i8x16.min_s $push11=, $0, $pop0
476; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
477; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
478; SIMD128-NEXT:    i8x16.min_s $push9=, $pop10, $pop1
479; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
480; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
481; SIMD128-NEXT:    i8x16.min_s $push7=, $pop8, $pop2
482; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
483; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
484; SIMD128-NEXT:    i8x16.min_s $push4=, $pop6, $pop3
485; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
486; SIMD128-NEXT:    return $pop5
487  %res = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arg)
488  ret i8 %res
489}
490
491define i64 @pairwise_umax_v2i64(<2 x i64> %arg) {
492; SIMD128-LABEL: pairwise_umax_v2i64:
493; SIMD128:         .functype pairwise_umax_v2i64 (v128) -> (i64)
494; SIMD128-NEXT:  # %bb.0:
495; SIMD128-NEXT:    i8x16.shuffle $push10=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
496; SIMD128-NEXT:    local.tee $push9=, $1=, $pop10
497; SIMD128-NEXT:    i64.const $push4=, -1
498; SIMD128-NEXT:    i64.const $push3=, 0
499; SIMD128-NEXT:    i64x2.extract_lane $push1=, $0, 0
500; SIMD128-NEXT:    i64x2.extract_lane $push0=, $1, 0
501; SIMD128-NEXT:    i64.gt_u $push2=, $pop1, $pop0
502; SIMD128-NEXT:    i64.select $push5=, $pop4, $pop3, $pop2
503; SIMD128-NEXT:    i64x2.replace_lane $push6=, $0, 0, $pop5
504; SIMD128-NEXT:    v128.bitselect $push7=, $0, $pop9, $pop6
505; SIMD128-NEXT:    i64x2.extract_lane $push8=, $pop7, 0
506; SIMD128-NEXT:    return $pop8
507  %res = tail call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %arg)
508  ret i64 %res
509}
510
511define i32 @pairwise_umax_v4i32(<4 x i32> %arg) {
512; SIMD128-LABEL: pairwise_umax_v4i32:
513; SIMD128:         .functype pairwise_umax_v4i32 (v128) -> (i32)
514; SIMD128-NEXT:  # %bb.0:
515; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
516; SIMD128-NEXT:    i32x4.max_u $push5=, $0, $pop0
517; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
518; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
519; SIMD128-NEXT:    i32x4.max_u $push2=, $pop4, $pop1
520; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
521; SIMD128-NEXT:    return $pop3
522  %res = tail call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arg)
523  ret i32 %res
524}
525
526define i16 @pairwise_umax_v8i16(<8 x i16> %arg) {
527; SIMD128-LABEL: pairwise_umax_v8i16:
528; SIMD128:         .functype pairwise_umax_v8i16 (v128) -> (i32)
529; SIMD128-NEXT:  # %bb.0:
530; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
531; SIMD128-NEXT:    i16x8.max_u $push8=, $0, $pop0
532; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
533; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
534; SIMD128-NEXT:    i16x8.max_u $push6=, $pop7, $pop1
535; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
536; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
537; SIMD128-NEXT:    i16x8.max_u $push3=, $pop5, $pop2
538; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
539; SIMD128-NEXT:    return $pop4
540  %res = tail call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arg)
541  ret i16 %res
542}
543
544define i8 @pairwise_umax_v16i8(<16 x i8> %arg) {
545; SIMD128-LABEL: pairwise_umax_v16i8:
546; SIMD128:         .functype pairwise_umax_v16i8 (v128) -> (i32)
547; SIMD128-NEXT:  # %bb.0:
548; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
549; SIMD128-NEXT:    i8x16.max_u $push11=, $0, $pop0
550; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
551; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
552; SIMD128-NEXT:    i8x16.max_u $push9=, $pop10, $pop1
553; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
554; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
555; SIMD128-NEXT:    i8x16.max_u $push7=, $pop8, $pop2
556; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
557; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
558; SIMD128-NEXT:    i8x16.max_u $push4=, $pop6, $pop3
559; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
560; SIMD128-NEXT:    return $pop5
561  %res = tail call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arg)
562  ret i8 %res
563}
564
565define i64 @pairwise_umin_v2i64(<2 x i64> %arg) {
566; SIMD128-LABEL: pairwise_umin_v2i64:
567; SIMD128:         .functype pairwise_umin_v2i64 (v128) -> (i64)
568; SIMD128-NEXT:  # %bb.0:
569; SIMD128-NEXT:    i8x16.shuffle $push10=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
570; SIMD128-NEXT:    local.tee $push9=, $1=, $pop10
571; SIMD128-NEXT:    i64.const $push4=, -1
572; SIMD128-NEXT:    i64.const $push3=, 0
573; SIMD128-NEXT:    i64x2.extract_lane $push1=, $0, 0
574; SIMD128-NEXT:    i64x2.extract_lane $push0=, $1, 0
575; SIMD128-NEXT:    i64.lt_u $push2=, $pop1, $pop0
576; SIMD128-NEXT:    i64.select $push5=, $pop4, $pop3, $pop2
577; SIMD128-NEXT:    i64x2.replace_lane $push6=, $0, 0, $pop5
578; SIMD128-NEXT:    v128.bitselect $push7=, $0, $pop9, $pop6
579; SIMD128-NEXT:    i64x2.extract_lane $push8=, $pop7, 0
580; SIMD128-NEXT:    return $pop8
581  %res = tail call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %arg)
582  ret i64 %res
583}
584
585define i32 @pairwise_umin_v4i32(<4 x i32> %arg) {
586; SIMD128-LABEL: pairwise_umin_v4i32:
587; SIMD128:         .functype pairwise_umin_v4i32 (v128) -> (i32)
588; SIMD128-NEXT:  # %bb.0:
589; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
590; SIMD128-NEXT:    i32x4.min_u $push5=, $0, $pop0
591; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
592; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
593; SIMD128-NEXT:    i32x4.min_u $push2=, $pop4, $pop1
594; SIMD128-NEXT:    i32x4.extract_lane $push3=, $pop2, 0
595; SIMD128-NEXT:    return $pop3
596  %res = tail call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arg)
597  ret i32 %res
598}
599
600define i16 @pairwise_umin_v8i16(<8 x i16> %arg) {
601; SIMD128-LABEL: pairwise_umin_v8i16:
602; SIMD128:         .functype pairwise_umin_v8i16 (v128) -> (i32)
603; SIMD128-NEXT:  # %bb.0:
604; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
605; SIMD128-NEXT:    i16x8.min_u $push8=, $0, $pop0
606; SIMD128-NEXT:    local.tee $push7=, $0=, $pop8
607; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
608; SIMD128-NEXT:    i16x8.min_u $push6=, $pop7, $pop1
609; SIMD128-NEXT:    local.tee $push5=, $0=, $pop6
610; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
611; SIMD128-NEXT:    i16x8.min_u $push3=, $pop5, $pop2
612; SIMD128-NEXT:    i16x8.extract_lane_u $push4=, $pop3, 0
613; SIMD128-NEXT:    return $pop4
614  %res = tail call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arg)
615  ret i16 %res
616}
617
618define i8 @pairwise_umin_v16i8(<16 x i8> %arg) {
619; SIMD128-LABEL: pairwise_umin_v16i8:
620; SIMD128:         .functype pairwise_umin_v16i8 (v128) -> (i32)
621; SIMD128-NEXT:  # %bb.0:
622; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
623; SIMD128-NEXT:    i8x16.min_u $push11=, $0, $pop0
624; SIMD128-NEXT:    local.tee $push10=, $0=, $pop11
625; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
626; SIMD128-NEXT:    i8x16.min_u $push9=, $pop10, $pop1
627; SIMD128-NEXT:    local.tee $push8=, $0=, $pop9
628; SIMD128-NEXT:    i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
629; SIMD128-NEXT:    i8x16.min_u $push7=, $pop8, $pop2
630; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
631; SIMD128-NEXT:    i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
632; SIMD128-NEXT:    i8x16.min_u $push4=, $pop6, $pop3
633; SIMD128-NEXT:    i8x16.extract_lane_u $push5=, $pop4, 0
634; SIMD128-NEXT:    return $pop5
635  %res = tail call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arg)
636  ret i8 %res
637}
638
639define double @pairwise_add_v2f64(<2 x double> %arg) {
640; SIMD128-LABEL: pairwise_add_v2f64:
641; SIMD128:         .functype pairwise_add_v2f64 (v128) -> (f64)
642; SIMD128-NEXT:  # %bb.0:
643; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
644; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
645; SIMD128-NEXT:    f64.add $push2=, $pop1, $pop0
646; SIMD128-NEXT:    return $pop2
647  %res = tail call double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %arg)
648  ret double%res
649}
650
651define double @pairwise_add_v2f64_fast(<2 x double> %arg) {
652; SIMD128-LABEL: pairwise_add_v2f64_fast:
653; SIMD128:         .functype pairwise_add_v2f64_fast (v128) -> (f64)
654; SIMD128-NEXT:  # %bb.0:
655; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
656; SIMD128-NEXT:    f64x2.add $push1=, $0, $pop0
657; SIMD128-NEXT:    f64x2.extract_lane $push2=, $pop1, 0
658; SIMD128-NEXT:    return $pop2
659  %res = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %arg)
660  ret double%res
661}
662
663define float @pairwise_add_v4f32(<4 x float> %arg) {
664; SIMD128-LABEL: pairwise_add_v4f32:
665; SIMD128:         .functype pairwise_add_v4f32 (v128) -> (f32)
666; SIMD128-NEXT:  # %bb.0:
667; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
668; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
669; SIMD128-NEXT:    f32.add $push2=, $pop1, $pop0
670; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
671; SIMD128-NEXT:    f32.add $push4=, $pop2, $pop3
672; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
673; SIMD128-NEXT:    f32.add $push6=, $pop4, $pop5
674; SIMD128-NEXT:    return $pop6
675  %res = tail call float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg)
676  ret float %res
677}
678
679define float @pairwise_add_v4f32_fast(<4 x float> %arg) {
680; SIMD128-LABEL: pairwise_add_v4f32_fast:
681; SIMD128:         .functype pairwise_add_v4f32_fast (v128) -> (f32)
682; SIMD128-NEXT:  # %bb.0:
683; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3
684; SIMD128-NEXT:    f32x4.add $push5=, $0, $pop0
685; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
686; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
687; SIMD128-NEXT:    f32x4.add $push2=, $pop4, $pop1
688; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
689; SIMD128-NEXT:    return $pop3
690  %res = tail call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg)
691  ret float %res
692}
693
694define float @pairwise_add_v4f32_reassoc(<4 x float> %arg) {
695; SIMD128-LABEL: pairwise_add_v4f32_reassoc:
696; SIMD128:         .functype pairwise_add_v4f32_reassoc (v128) -> (f32)
697; SIMD128-NEXT:  # %bb.0:
698; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3
699; SIMD128-NEXT:    f32x4.add $push5=, $0, $pop0
700; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
701; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
702; SIMD128-NEXT:    f32x4.add $push2=, $pop4, $pop1
703; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
704; SIMD128-NEXT:    return $pop3
705  %res = tail call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg)
706  ret float %res
707}
708
709define double @pairwise_mul_v2f64(<2 x double> %arg) {
710; SIMD128-LABEL: pairwise_mul_v2f64:
711; SIMD128:         .functype pairwise_mul_v2f64 (v128) -> (f64)
712; SIMD128-NEXT:  # %bb.0:
713; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 0
714; SIMD128-NEXT:    f64.const $push1=, -0x0p0
715; SIMD128-NEXT:    f64.mul $push2=, $pop0, $pop1
716; SIMD128-NEXT:    f64x2.extract_lane $push3=, $0, 1
717; SIMD128-NEXT:    f64.mul $push4=, $pop2, $pop3
718; SIMD128-NEXT:    return $pop4
719  %res = tail call double @llvm.vector.reduce.fmul.v2f64(double -0.0, <2 x double> %arg)
720  ret double%res
721}
722
723define double @pairwise_mul_v2f64_fast(<2 x double> %arg) {
724; SIMD128-LABEL: pairwise_mul_v2f64_fast:
725; SIMD128:         .functype pairwise_mul_v2f64_fast (v128) -> (f64)
726; SIMD128-NEXT:  # %bb.0:
727; SIMD128-NEXT:    f64.const $push0=, 0x0p0
728; SIMD128-NEXT:    return $pop0
729  %res = tail call fast double @llvm.vector.reduce.fmul.v2f64(double -0.0, <2 x double> %arg)
730  ret double%res
731}
732
733define float @pairwise_mul_v4f32(<4 x float> %arg) {
734; SIMD128-LABEL: pairwise_mul_v4f32:
735; SIMD128:         .functype pairwise_mul_v4f32 (v128) -> (f32)
736; SIMD128-NEXT:  # %bb.0:
737; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 0
738; SIMD128-NEXT:    f32.const $push1=, -0x0p0
739; SIMD128-NEXT:    f32.mul $push2=, $pop0, $pop1
740; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 1
741; SIMD128-NEXT:    f32.mul $push4=, $pop2, $pop3
742; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 2
743; SIMD128-NEXT:    f32.mul $push6=, $pop4, $pop5
744; SIMD128-NEXT:    f32x4.extract_lane $push7=, $0, 3
745; SIMD128-NEXT:    f32.mul $push8=, $pop6, $pop7
746; SIMD128-NEXT:    return $pop8
747  %res = tail call float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg)
748  ret float %res
749}
750
751define float @pairwise_mul_v4f32_fast(<4 x float> %arg) {
752; SIMD128-LABEL: pairwise_mul_v4f32_fast:
753; SIMD128:         .functype pairwise_mul_v4f32_fast (v128) -> (f32)
754; SIMD128-NEXT:  # %bb.0:
755; SIMD128-NEXT:    f32.const $push0=, 0x0p0
756; SIMD128-NEXT:    return $pop0
757  %res = tail call fast float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg)
758  ret float %res
759}
760
761define float @pairwise_mul_v4f32_reassoc(<4 x float> %arg) {
762; SIMD128-LABEL: pairwise_mul_v4f32_reassoc:
763; SIMD128:         .functype pairwise_mul_v4f32_reassoc (v128) -> (f32)
764; SIMD128-NEXT:  # %bb.0:
765; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
766; SIMD128-NEXT:    f32x4.mul $push7=, $0, $pop0
767; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
768; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
769; SIMD128-NEXT:    f32x4.mul $push2=, $pop6, $pop1
770; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
771; SIMD128-NEXT:    f32.const $push4=, -0x0p0
772; SIMD128-NEXT:    f32.mul $push5=, $pop3, $pop4
773; SIMD128-NEXT:    return $pop5
774  %res = tail call reassoc float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg)
775  ret float %res
776}
777
778define double @pairwise_max_v2f64(<2 x double> %arg) {
779; SIMD128-LABEL: pairwise_max_v2f64:
780; SIMD128:         .functype pairwise_max_v2f64 (v128) -> (f64)
781; SIMD128-NEXT:  # %bb.0:
782; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
783; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
784; SIMD128-NEXT:    call $push2=, fmax, $pop1, $pop0
785; SIMD128-NEXT:    return $pop2
786  %res = tail call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg)
787  ret double%res
788}
789
790define double @pairwise_max_v2f64_fast(<2 x double> %arg) {
791; SIMD128-LABEL: pairwise_max_v2f64_fast:
792; SIMD128:         .functype pairwise_max_v2f64_fast (v128) -> (f64)
793; SIMD128-NEXT:  # %bb.0:
794; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
795; SIMD128-NEXT:    f64x2.pmax $push1=, $0, $pop0
796; SIMD128-NEXT:    f64x2.extract_lane $push2=, $pop1, 0
797; SIMD128-NEXT:    return $pop2
798  %res = tail call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg)
799  ret double%res
800}
801
802define float @pairwise_max_v4f32(<4 x float> %arg) {
803; SIMD128-LABEL: pairwise_max_v4f32:
804; SIMD128:         .functype pairwise_max_v4f32 (v128) -> (f32)
805; SIMD128-NEXT:  # %bb.0:
806; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 0
807; SIMD128-NEXT:    f32x4.extract_lane $push2=, $0, 1
808; SIMD128-NEXT:    call $push4=, fmaxf, $pop3, $pop2
809; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 2
810; SIMD128-NEXT:    call $push5=, fmaxf, $pop4, $pop1
811; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 3
812; SIMD128-NEXT:    call $push6=, fmaxf, $pop5, $pop0
813; SIMD128-NEXT:    return $pop6
814  %res = tail call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
815  ret float %res
816}
817
818define float @pairwise_max_v4f32_fast(<4 x float> %arg) {
819; SIMD128-LABEL: pairwise_max_v4f32_fast:
820; SIMD128:         .functype pairwise_max_v4f32_fast (v128) -> (f32)
821; SIMD128-NEXT:  # %bb.0:
822; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
823; SIMD128-NEXT:    f32x4.pmax $push5=, $0, $pop0
824; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
825; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
826; SIMD128-NEXT:    f32x4.pmax $push2=, $pop4, $pop1
827; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
828; SIMD128-NEXT:    return $pop3
829  %res = tail call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
830  ret float %res
831}
832
833define float @pairwise_max_v4f32_reassoc(<4 x float> %arg) {
834; SIMD128-LABEL: pairwise_max_v4f32_reassoc:
835; SIMD128:         .functype pairwise_max_v4f32_reassoc (v128) -> (f32)
836; SIMD128-NEXT:  # %bb.0:
837; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 0
838; SIMD128-NEXT:    f32x4.extract_lane $push2=, $0, 1
839; SIMD128-NEXT:    call $push4=, fmaxf, $pop3, $pop2
840; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 2
841; SIMD128-NEXT:    call $push5=, fmaxf, $pop4, $pop1
842; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 3
843; SIMD128-NEXT:    call $push6=, fmaxf, $pop5, $pop0
844; SIMD128-NEXT:    return $pop6
845  %res = tail call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
846  ret float %res
847}
848
849define double @pairwise_min_v2f64(<2 x double> %arg) {
850; SIMD128-LABEL: pairwise_min_v2f64:
851; SIMD128:         .functype pairwise_min_v2f64 (v128) -> (f64)
852; SIMD128-NEXT:  # %bb.0:
853; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
854; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
855; SIMD128-NEXT:    call $push2=, fmin, $pop1, $pop0
856; SIMD128-NEXT:    return $pop2
857  %res = tail call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg)
858  ret double%res
859}
860
861define double @pairwise_min_v2f64_fast(<2 x double> %arg) {
862; SIMD128-LABEL: pairwise_min_v2f64_fast:
863; SIMD128:         .functype pairwise_min_v2f64_fast (v128) -> (f64)
864; SIMD128-NEXT:  # %bb.0:
865; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
866; SIMD128-NEXT:    f64x2.pmin $push1=, $0, $pop0
867; SIMD128-NEXT:    f64x2.extract_lane $push2=, $pop1, 0
868; SIMD128-NEXT:    return $pop2
869  %res = tail call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg)
870  ret double%res
871}
872
873define float @pairwise_min_v4f32(<4 x float> %arg) {
874; SIMD128-LABEL: pairwise_min_v4f32:
875; SIMD128:         .functype pairwise_min_v4f32 (v128) -> (f32)
876; SIMD128-NEXT:  # %bb.0:
877; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 0
878; SIMD128-NEXT:    f32x4.extract_lane $push2=, $0, 1
879; SIMD128-NEXT:    call $push4=, fminf, $pop3, $pop2
880; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 2
881; SIMD128-NEXT:    call $push5=, fminf, $pop4, $pop1
882; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 3
883; SIMD128-NEXT:    call $push6=, fminf, $pop5, $pop0
884; SIMD128-NEXT:    return $pop6
885  %res = tail call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
886  ret float %res
887}
888
889define float @pairwise_min_v4f32_fast(<4 x float> %arg) {
890; SIMD128-LABEL: pairwise_min_v4f32_fast:
891; SIMD128:         .functype pairwise_min_v4f32_fast (v128) -> (f32)
892; SIMD128-NEXT:  # %bb.0:
893; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
894; SIMD128-NEXT:    f32x4.pmin $push5=, $0, $pop0
895; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
896; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
897; SIMD128-NEXT:    f32x4.pmin $push2=, $pop4, $pop1
898; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
899; SIMD128-NEXT:    return $pop3
900  %res = tail call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
901  ret float %res
902}
903
904define float @pairwise_min_v4f32_reassoc(<4 x float> %arg) {
905; SIMD128-LABEL: pairwise_min_v4f32_reassoc:
906; SIMD128:         .functype pairwise_min_v4f32_reassoc (v128) -> (f32)
907; SIMD128-NEXT:  # %bb.0:
908; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 0
909; SIMD128-NEXT:    f32x4.extract_lane $push2=, $0, 1
910; SIMD128-NEXT:    call $push4=, fminf, $pop3, $pop2
911; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 2
912; SIMD128-NEXT:    call $push5=, fminf, $pop4, $pop1
913; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 3
914; SIMD128-NEXT:    call $push6=, fminf, $pop5, $pop0
915; SIMD128-NEXT:    return $pop6
916  %res = tail call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
917  ret float %res
918}
919
920define double @pairwise_maximum_v2f64(<2 x double> %arg) {
921; SIMD128-LABEL: pairwise_maximum_v2f64:
922; SIMD128:         .functype pairwise_maximum_v2f64 (v128) -> (f64)
923; SIMD128-NEXT:  # %bb.0:
924; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
925; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
926; SIMD128-NEXT:    f64.max $push2=, $pop1, $pop0
927; SIMD128-NEXT:    return $pop2
928  %res = tail call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %arg)
929  ret double%res
930}
931
932define double @pairwise_maximum_v2f64_fast(<2 x double> %arg) {
933; SIMD128-LABEL: pairwise_maximum_v2f64_fast:
934; SIMD128:         .functype pairwise_maximum_v2f64_fast (v128) -> (f64)
935; SIMD128-NEXT:  # %bb.0:
936; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
937; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
938; SIMD128-NEXT:    f64.max $push2=, $pop1, $pop0
939; SIMD128-NEXT:    return $pop2
940  %res = tail call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %arg)
941  ret double%res
942}
943
944define float @pairwise_maximum_v4f32(<4 x float> %arg) {
945; SIMD128-LABEL: pairwise_maximum_v4f32:
946; SIMD128:         .functype pairwise_maximum_v4f32 (v128) -> (f32)
947; SIMD128-NEXT:  # %bb.0:
948; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
949; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
950; SIMD128-NEXT:    f32.max $push2=, $pop1, $pop0
951; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
952; SIMD128-NEXT:    f32.max $push4=, $pop2, $pop3
953; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
954; SIMD128-NEXT:    f32.max $push6=, $pop4, $pop5
955; SIMD128-NEXT:    return $pop6
956  %res = tail call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg)
957  ret float %res
958}
959
960define float @pairwise_maximum_v4f32_fast(<4 x float> %arg) {
961; SIMD128-LABEL: pairwise_maximum_v4f32_fast:
962; SIMD128:         .functype pairwise_maximum_v4f32_fast (v128) -> (f32)
963; SIMD128-NEXT:  # %bb.0:
964; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
965; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
966; SIMD128-NEXT:    f32.max $push2=, $pop1, $pop0
967; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
968; SIMD128-NEXT:    f32.max $push4=, $pop2, $pop3
969; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
970; SIMD128-NEXT:    f32.max $push6=, $pop4, $pop5
971; SIMD128-NEXT:    return $pop6
972  %res = tail call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg)
973  ret float %res
974}
975
976define float @pairwise_maximum_v4f32_reassoc(<4 x float> %arg) {
977; SIMD128-LABEL: pairwise_maximum_v4f32_reassoc:
978; SIMD128:         .functype pairwise_maximum_v4f32_reassoc (v128) -> (f32)
979; SIMD128-NEXT:  # %bb.0:
980; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
981; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
982; SIMD128-NEXT:    f32.max $push2=, $pop1, $pop0
983; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
984; SIMD128-NEXT:    f32.max $push4=, $pop2, $pop3
985; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
986; SIMD128-NEXT:    f32.max $push6=, $pop4, $pop5
987; SIMD128-NEXT:    return $pop6
988  %res = tail call reassoc float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg)
989  ret float %res
990}
991
992define double @pairwise_minimum_v2f64(<2 x double> %arg) {
993; SIMD128-LABEL: pairwise_minimum_v2f64:
994; SIMD128:         .functype pairwise_minimum_v2f64 (v128) -> (f64)
995; SIMD128-NEXT:  # %bb.0:
996; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
997; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
998; SIMD128-NEXT:    f64.min $push2=, $pop1, $pop0
999; SIMD128-NEXT:    return $pop2
1000  %res = tail call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %arg)
1001  ret double%res
1002}
1003
1004define double @pairwise_minimum_v2f64_fast(<2 x double> %arg) {
1005; SIMD128-LABEL: pairwise_minimum_v2f64_fast:
1006; SIMD128:         .functype pairwise_minimum_v2f64_fast (v128) -> (f64)
1007; SIMD128-NEXT:  # %bb.0:
1008; SIMD128-NEXT:    f64x2.extract_lane $push1=, $0, 0
1009; SIMD128-NEXT:    f64x2.extract_lane $push0=, $0, 1
1010; SIMD128-NEXT:    f64.min $push2=, $pop1, $pop0
1011; SIMD128-NEXT:    return $pop2
1012  %res = tail call fast double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %arg)
1013  ret double%res
1014}
1015
1016define float @pairwise_minimum_v4f32(<4 x float> %arg) {
1017; SIMD128-LABEL: pairwise_minimum_v4f32:
1018; SIMD128:         .functype pairwise_minimum_v4f32 (v128) -> (f32)
1019; SIMD128-NEXT:  # %bb.0:
1020; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
1021; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
1022; SIMD128-NEXT:    f32.min $push2=, $pop1, $pop0
1023; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
1024; SIMD128-NEXT:    f32.min $push4=, $pop2, $pop3
1025; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
1026; SIMD128-NEXT:    f32.min $push6=, $pop4, $pop5
1027; SIMD128-NEXT:    return $pop6
1028  %res = tail call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg)
1029  ret float %res
1030}
1031
1032define float @pairwise_minimum_v4f32_fast(<4 x float> %arg) {
1033; SIMD128-LABEL: pairwise_minimum_v4f32_fast:
1034; SIMD128:         .functype pairwise_minimum_v4f32_fast (v128) -> (f32)
1035; SIMD128-NEXT:  # %bb.0:
1036; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
1037; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
1038; SIMD128-NEXT:    f32.min $push2=, $pop1, $pop0
1039; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
1040; SIMD128-NEXT:    f32.min $push4=, $pop2, $pop3
1041; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
1042; SIMD128-NEXT:    f32.min $push6=, $pop4, $pop5
1043; SIMD128-NEXT:    return $pop6
1044  %res = tail call fast float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg)
1045  ret float %res
1046}
1047
1048define float @pairwise_minimum_v4f32_reassoc(<4 x float> %arg) {
1049; SIMD128-LABEL: pairwise_minimum_v4f32_reassoc:
1050; SIMD128:         .functype pairwise_minimum_v4f32_reassoc (v128) -> (f32)
1051; SIMD128-NEXT:  # %bb.0:
1052; SIMD128-NEXT:    f32x4.extract_lane $push1=, $0, 0
1053; SIMD128-NEXT:    f32x4.extract_lane $push0=, $0, 1
1054; SIMD128-NEXT:    f32.min $push2=, $pop1, $pop0
1055; SIMD128-NEXT:    f32x4.extract_lane $push3=, $0, 2
1056; SIMD128-NEXT:    f32.min $push4=, $pop2, $pop3
1057; SIMD128-NEXT:    f32x4.extract_lane $push5=, $0, 3
1058; SIMD128-NEXT:    f32.min $push6=, $pop4, $pop5
1059; SIMD128-NEXT:    return $pop6
1060  %res = tail call reassoc float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg)
1061  ret float %res
1062}
1063