1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=wasm32 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD128 3 4define i64 @pairwise_add_v2i64(<2 x i64> %arg) { 5; SIMD128-LABEL: pairwise_add_v2i64: 6; SIMD128: .functype pairwise_add_v2i64 (v128) -> (i64) 7; SIMD128-NEXT: # %bb.0: 8; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 9; SIMD128-NEXT: i64x2.add $push1=, $0, $pop0 10; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 11; SIMD128-NEXT: return $pop2 12 %res = tail call i64 @llvm.vector.reduce.add.i64.v4i64(<2 x i64> %arg) 13 ret i64 %res 14} 15 16define i32 @pairwise_add_v4i32(<4 x i32> %arg) { 17; SIMD128-LABEL: pairwise_add_v4i32: 18; SIMD128: .functype pairwise_add_v4i32 (v128) -> (i32) 19; SIMD128-NEXT: # %bb.0: 20; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 21; SIMD128-NEXT: i32x4.add $push5=, $0, $pop0 22; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 23; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 24; SIMD128-NEXT: i32x4.add $push2=, $pop4, $pop1 25; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 26; SIMD128-NEXT: return $pop3 27 %res = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg) 28 ret i32 %res 29} 30 31define i16 @pairwise_add_v8i16(<8 x i16> %arg) { 32; SIMD128-LABEL: pairwise_add_v8i16: 33; SIMD128: .functype pairwise_add_v8i16 (v128) -> (i32) 34; SIMD128-NEXT: # %bb.0: 35; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 36; SIMD128-NEXT: i16x8.add $push8=, $0, $pop0 37; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 38; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 39; SIMD128-NEXT: i16x8.add $push6=, $pop7, $pop1 40; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 41; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 42; SIMD128-NEXT: i16x8.add $push3=, $pop5, $pop2 43; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 44; SIMD128-NEXT: return $pop4 45 %res = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %arg) 46 ret i16 %res 47} 48 49define i8 @pairwise_add_v16i8(<16 x i8> %arg) { 50; SIMD128-LABEL: pairwise_add_v16i8: 51; SIMD128: .functype pairwise_add_v16i8 (v128) -> (i32) 52; SIMD128-NEXT: # %bb.0: 53; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 54; SIMD128-NEXT: i8x16.add $push11=, $0, $pop0 55; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 56; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 57; SIMD128-NEXT: i8x16.add $push9=, $pop10, $pop1 58; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 59; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 60; SIMD128-NEXT: i8x16.add $push7=, $pop8, $pop2 61; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 62; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 63; SIMD128-NEXT: i8x16.add $push4=, $pop6, $pop3 64; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 65; SIMD128-NEXT: return $pop5 66 %res = tail call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %arg) 67 ret i8 %res 68} 69 70define i64 @pairwise_mul_v2i64(<2 x i64> %arg) { 71; SIMD128-LABEL: pairwise_mul_v2i64: 72; SIMD128: .functype pairwise_mul_v2i64 (v128) -> (i64) 73; SIMD128-NEXT: # %bb.0: 74; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 75; SIMD128-NEXT: i64x2.mul $push1=, $0, $pop0 76; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 77; SIMD128-NEXT: return $pop2 78 %res = tail call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %arg) 79 ret i64 %res 80} 81 82define i32 @pairwise_mul_v4i32(<4 x i32> %arg) { 83; SIMD128-LABEL: pairwise_mul_v4i32: 84; SIMD128: .functype pairwise_mul_v4i32 (v128) -> (i32) 85; SIMD128-NEXT: # %bb.0: 86; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 87; SIMD128-NEXT: i32x4.mul $push5=, $0, $pop0 88; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 89; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 90; SIMD128-NEXT: i32x4.mul $push2=, $pop4, $pop1 91; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 92; SIMD128-NEXT: return $pop3 93 %res = tail call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %arg) 94 ret i32 %res 95} 96 97define i16 @pairwise_mul_v8i16(<8 x i16> %arg) { 98; SIMD128-LABEL: pairwise_mul_v8i16: 99; SIMD128: .functype pairwise_mul_v8i16 (v128) -> (i32) 100; SIMD128-NEXT: # %bb.0: 101; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 102; SIMD128-NEXT: i16x8.mul $push8=, $0, $pop0 103; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 104; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 105; SIMD128-NEXT: i16x8.mul $push6=, $pop7, $pop1 106; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 107; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 108; SIMD128-NEXT: i16x8.mul $push3=, $pop5, $pop2 109; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 110; SIMD128-NEXT: return $pop4 111 %res = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %arg) 112 ret i16 %res 113} 114 115define i8 @pairwise_mul_v16i8(<16 x i8> %arg) { 116; SIMD128-LABEL: pairwise_mul_v16i8: 117; SIMD128: .functype pairwise_mul_v16i8 (v128) -> (i32) 118; SIMD128-NEXT: # %bb.0: 119; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $0, 0 120; SIMD128-NEXT: i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 121; SIMD128-NEXT: local.tee $push31=, $1=, $pop32 122; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $pop31, 0 123; SIMD128-NEXT: i32.mul $push27=, $pop26, $pop25 124; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4 125; SIMD128-NEXT: i8x16.extract_lane_u $push22=, $1, 4 126; SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 127; SIMD128-NEXT: i32.mul $push28=, $pop27, $pop24 128; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 2 129; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $1, 2 130; SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 131; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $0, 6 132; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $1, 6 133; SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 134; SIMD128-NEXT: i32.mul $push21=, $pop20, $pop17 135; SIMD128-NEXT: i32.mul $push29=, $pop28, $pop21 136; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 1 137; SIMD128-NEXT: i8x16.extract_lane_u $push10=, $1, 1 138; SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 139; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $0, 5 140; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $1, 5 141; SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 142; SIMD128-NEXT: i32.mul $push13=, $pop12, $pop9 143; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 3 144; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 3 145; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 146; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 7 147; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 7 148; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 149; SIMD128-NEXT: i32.mul $push6=, $pop5, $pop2 150; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop6 151; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop14 152; SIMD128-NEXT: return $pop30 153 %res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg) 154 ret i8 %res 155} 156 157define i64 @pairwise_and_v2i64(<2 x i64> %arg) { 158; SIMD128-LABEL: pairwise_and_v2i64: 159; SIMD128: .functype pairwise_and_v2i64 (v128) -> (i64) 160; SIMD128-NEXT: # %bb.0: 161; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 162; SIMD128-NEXT: v128.and $push1=, $0, $pop0 163; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 164; SIMD128-NEXT: return $pop2 165 %res = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %arg) 166 ret i64 %res 167} 168 169define i32 @pairwise_and_v4i32(<4 x i32> %arg) { 170; SIMD128-LABEL: pairwise_and_v4i32: 171; SIMD128: .functype pairwise_and_v4i32 (v128) -> (i32) 172; SIMD128-NEXT: # %bb.0: 173; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 174; SIMD128-NEXT: v128.and $push5=, $0, $pop0 175; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 176; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 177; SIMD128-NEXT: v128.and $push2=, $pop4, $pop1 178; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 179; SIMD128-NEXT: return $pop3 180 %res = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %arg) 181 ret i32 %res 182} 183 184define i16 @pairwise_and_v8i16(<8 x i16> %arg) { 185; SIMD128-LABEL: pairwise_and_v8i16: 186; SIMD128: .functype pairwise_and_v8i16 (v128) -> (i32) 187; SIMD128-NEXT: # %bb.0: 188; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 189; SIMD128-NEXT: v128.and $push8=, $0, $pop0 190; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 191; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 192; SIMD128-NEXT: v128.and $push6=, $pop7, $pop1 193; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 194; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 195; SIMD128-NEXT: v128.and $push3=, $pop5, $pop2 196; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 197; SIMD128-NEXT: return $pop4 198 %res = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %arg) 199 ret i16 %res 200} 201 202define i8 @pairwise_and_v16i8(<16 x i8> %arg) { 203; SIMD128-LABEL: pairwise_and_v16i8: 204; SIMD128: .functype pairwise_and_v16i8 (v128) -> (i32) 205; SIMD128-NEXT: # %bb.0: 206; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 207; SIMD128-NEXT: v128.and $push11=, $0, $pop0 208; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 209; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 210; SIMD128-NEXT: v128.and $push9=, $pop10, $pop1 211; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 212; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 213; SIMD128-NEXT: v128.and $push7=, $pop8, $pop2 214; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 215; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 216; SIMD128-NEXT: v128.and $push4=, $pop6, $pop3 217; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 218; SIMD128-NEXT: return $pop5 219 %res = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %arg) 220 ret i8 %res 221} 222 223define i64 @pairwise_or_v2i64(<2 x i64> %arg) { 224; SIMD128-LABEL: pairwise_or_v2i64: 225; SIMD128: .functype pairwise_or_v2i64 (v128) -> (i64) 226; SIMD128-NEXT: # %bb.0: 227; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 228; SIMD128-NEXT: v128.or $push1=, $0, $pop0 229; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 230; SIMD128-NEXT: return $pop2 231 %res = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %arg) 232 ret i64 %res 233} 234 235define i32 @pairwise_or_v4i32(<4 x i32> %arg) { 236; SIMD128-LABEL: pairwise_or_v4i32: 237; SIMD128: .functype pairwise_or_v4i32 (v128) -> (i32) 238; SIMD128-NEXT: # %bb.0: 239; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 240; SIMD128-NEXT: v128.or $push5=, $0, $pop0 241; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 242; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 243; SIMD128-NEXT: v128.or $push2=, $pop4, $pop1 244; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 245; SIMD128-NEXT: return $pop3 246 %res = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %arg) 247 ret i32 %res 248} 249 250define i16 @pairwise_or_v8i16(<8 x i16> %arg) { 251; SIMD128-LABEL: pairwise_or_v8i16: 252; SIMD128: .functype pairwise_or_v8i16 (v128) -> (i32) 253; SIMD128-NEXT: # %bb.0: 254; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 255; SIMD128-NEXT: v128.or $push8=, $0, $pop0 256; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 257; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 258; SIMD128-NEXT: v128.or $push6=, $pop7, $pop1 259; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 260; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 261; SIMD128-NEXT: v128.or $push3=, $pop5, $pop2 262; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 263; SIMD128-NEXT: return $pop4 264 %res = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %arg) 265 ret i16 %res 266} 267 268define i8 @pairwise_or_v16i8(<16 x i8> %arg) { 269; SIMD128-LABEL: pairwise_or_v16i8: 270; SIMD128: .functype pairwise_or_v16i8 (v128) -> (i32) 271; SIMD128-NEXT: # %bb.0: 272; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 273; SIMD128-NEXT: v128.or $push11=, $0, $pop0 274; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 275; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 276; SIMD128-NEXT: v128.or $push9=, $pop10, $pop1 277; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 278; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 279; SIMD128-NEXT: v128.or $push7=, $pop8, $pop2 280; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 281; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 282; SIMD128-NEXT: v128.or $push4=, $pop6, $pop3 283; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 284; SIMD128-NEXT: return $pop5 285 %res = tail call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %arg) 286 ret i8 %res 287} 288 289define i64 @pairwise_xor_v2i64(<2 x i64> %arg) { 290; SIMD128-LABEL: pairwise_xor_v2i64: 291; SIMD128: .functype pairwise_xor_v2i64 (v128) -> (i64) 292; SIMD128-NEXT: # %bb.0: 293; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 294; SIMD128-NEXT: v128.xor $push1=, $0, $pop0 295; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 296; SIMD128-NEXT: return $pop2 297 %res = tail call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %arg) 298 ret i64 %res 299} 300 301define i32 @pairwise_xor_v4i32(<4 x i32> %arg) { 302; SIMD128-LABEL: pairwise_xor_v4i32: 303; SIMD128: .functype pairwise_xor_v4i32 (v128) -> (i32) 304; SIMD128-NEXT: # %bb.0: 305; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 306; SIMD128-NEXT: v128.xor $push5=, $0, $pop0 307; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 308; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 309; SIMD128-NEXT: v128.xor $push2=, $pop4, $pop1 310; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 311; SIMD128-NEXT: return $pop3 312 %res = tail call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %arg) 313 ret i32 %res 314} 315 316define i16 @pairwise_xor_v8i16(<8 x i16> %arg) { 317; SIMD128-LABEL: pairwise_xor_v8i16: 318; SIMD128: .functype pairwise_xor_v8i16 (v128) -> (i32) 319; SIMD128-NEXT: # %bb.0: 320; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 321; SIMD128-NEXT: v128.xor $push8=, $0, $pop0 322; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 323; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 324; SIMD128-NEXT: v128.xor $push6=, $pop7, $pop1 325; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 326; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 327; SIMD128-NEXT: v128.xor $push3=, $pop5, $pop2 328; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 329; SIMD128-NEXT: return $pop4 330 %res = tail call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %arg) 331 ret i16 %res 332} 333 334define i8 @pairwise_xor_v16i8(<16 x i8> %arg) { 335; SIMD128-LABEL: pairwise_xor_v16i8: 336; SIMD128: .functype pairwise_xor_v16i8 (v128) -> (i32) 337; SIMD128-NEXT: # %bb.0: 338; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 339; SIMD128-NEXT: v128.xor $push11=, $0, $pop0 340; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 341; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 342; SIMD128-NEXT: v128.xor $push9=, $pop10, $pop1 343; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 344; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 345; SIMD128-NEXT: v128.xor $push7=, $pop8, $pop2 346; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 347; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 348; SIMD128-NEXT: v128.xor $push4=, $pop6, $pop3 349; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 350; SIMD128-NEXT: return $pop5 351 %res = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %arg) 352 ret i8 %res 353} 354 355define i64 @pairwise_smax_v2i64(<2 x i64> %arg) { 356; SIMD128-LABEL: pairwise_smax_v2i64: 357; SIMD128: .functype pairwise_smax_v2i64 (v128) -> (i64) 358; SIMD128-NEXT: # %bb.0: 359; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 360; SIMD128-NEXT: local.tee $push3=, $1=, $pop4 361; SIMD128-NEXT: i64x2.gt_s $push0=, $0, $1 362; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0 363; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 364; SIMD128-NEXT: return $pop2 365 %res = tail call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %arg) 366 ret i64 %res 367} 368 369define i32 @pairwise_smax_v4i32(<4 x i32> %arg) { 370; SIMD128-LABEL: pairwise_smax_v4i32: 371; SIMD128: .functype pairwise_smax_v4i32 (v128) -> (i32) 372; SIMD128-NEXT: # %bb.0: 373; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 374; SIMD128-NEXT: i32x4.max_s $push5=, $0, $pop0 375; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 376; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 377; SIMD128-NEXT: i32x4.max_s $push2=, $pop4, $pop1 378; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 379; SIMD128-NEXT: return $pop3 380 %res = tail call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arg) 381 ret i32 %res 382} 383 384define i16 @pairwise_smax_v8i16(<8 x i16> %arg) { 385; SIMD128-LABEL: pairwise_smax_v8i16: 386; SIMD128: .functype pairwise_smax_v8i16 (v128) -> (i32) 387; SIMD128-NEXT: # %bb.0: 388; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 389; SIMD128-NEXT: i16x8.max_s $push8=, $0, $pop0 390; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 391; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 392; SIMD128-NEXT: i16x8.max_s $push6=, $pop7, $pop1 393; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 394; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 395; SIMD128-NEXT: i16x8.max_s $push3=, $pop5, $pop2 396; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 397; SIMD128-NEXT: return $pop4 398 %res = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arg) 399 ret i16 %res 400} 401 402define i8 @pairwise_smax_v16i8(<16 x i8> %arg) { 403; SIMD128-LABEL: pairwise_smax_v16i8: 404; SIMD128: .functype pairwise_smax_v16i8 (v128) -> (i32) 405; SIMD128-NEXT: # %bb.0: 406; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 407; SIMD128-NEXT: i8x16.max_s $push11=, $0, $pop0 408; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 409; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 410; SIMD128-NEXT: i8x16.max_s $push9=, $pop10, $pop1 411; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 412; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 413; SIMD128-NEXT: i8x16.max_s $push7=, $pop8, $pop2 414; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 415; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 416; SIMD128-NEXT: i8x16.max_s $push4=, $pop6, $pop3 417; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 418; SIMD128-NEXT: return $pop5 419 %res = tail call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arg) 420 ret i8 %res 421} 422 423define i64 @pairwise_smin_v2i64(<2 x i64> %arg) { 424; SIMD128-LABEL: pairwise_smin_v2i64: 425; SIMD128: .functype pairwise_smin_v2i64 (v128) -> (i64) 426; SIMD128-NEXT: # %bb.0: 427; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 428; SIMD128-NEXT: local.tee $push3=, $1=, $pop4 429; SIMD128-NEXT: i64x2.lt_s $push0=, $0, $1 430; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0 431; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0 432; SIMD128-NEXT: return $pop2 433 %res = tail call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %arg) 434 ret i64 %res 435} 436 437define i32 @pairwise_smin_v4i32(<4 x i32> %arg) { 438; SIMD128-LABEL: pairwise_smin_v4i32: 439; SIMD128: .functype pairwise_smin_v4i32 (v128) -> (i32) 440; SIMD128-NEXT: # %bb.0: 441; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 442; SIMD128-NEXT: i32x4.min_s $push5=, $0, $pop0 443; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 444; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 445; SIMD128-NEXT: i32x4.min_s $push2=, $pop4, $pop1 446; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 447; SIMD128-NEXT: return $pop3 448 %res = tail call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arg) 449 ret i32 %res 450} 451 452define i16 @pairwise_smin_v8i16(<8 x i16> %arg) { 453; SIMD128-LABEL: pairwise_smin_v8i16: 454; SIMD128: .functype pairwise_smin_v8i16 (v128) -> (i32) 455; SIMD128-NEXT: # %bb.0: 456; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 457; SIMD128-NEXT: i16x8.min_s $push8=, $0, $pop0 458; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 459; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 460; SIMD128-NEXT: i16x8.min_s $push6=, $pop7, $pop1 461; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 462; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 463; SIMD128-NEXT: i16x8.min_s $push3=, $pop5, $pop2 464; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 465; SIMD128-NEXT: return $pop4 466 %res = tail call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arg) 467 ret i16 %res 468} 469 470define i8 @pairwise_smin_v16i8(<16 x i8> %arg) { 471; SIMD128-LABEL: pairwise_smin_v16i8: 472; SIMD128: .functype pairwise_smin_v16i8 (v128) -> (i32) 473; SIMD128-NEXT: # %bb.0: 474; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 475; SIMD128-NEXT: i8x16.min_s $push11=, $0, $pop0 476; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 477; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 478; SIMD128-NEXT: i8x16.min_s $push9=, $pop10, $pop1 479; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 480; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 481; SIMD128-NEXT: i8x16.min_s $push7=, $pop8, $pop2 482; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 483; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 484; SIMD128-NEXT: i8x16.min_s $push4=, $pop6, $pop3 485; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 486; SIMD128-NEXT: return $pop5 487 %res = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arg) 488 ret i8 %res 489} 490 491define i64 @pairwise_umax_v2i64(<2 x i64> %arg) { 492; SIMD128-LABEL: pairwise_umax_v2i64: 493; SIMD128: .functype pairwise_umax_v2i64 (v128) -> (i64) 494; SIMD128-NEXT: # %bb.0: 495; SIMD128-NEXT: i8x16.shuffle $push10=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 496; SIMD128-NEXT: local.tee $push9=, $1=, $pop10 497; SIMD128-NEXT: i64.const $push4=, -1 498; SIMD128-NEXT: i64.const $push3=, 0 499; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 0 500; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 0 501; SIMD128-NEXT: i64.gt_u $push2=, $pop1, $pop0 502; SIMD128-NEXT: i64.select $push5=, $pop4, $pop3, $pop2 503; SIMD128-NEXT: i64x2.replace_lane $push6=, $0, 0, $pop5 504; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop9, $pop6 505; SIMD128-NEXT: i64x2.extract_lane $push8=, $pop7, 0 506; SIMD128-NEXT: return $pop8 507 %res = tail call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %arg) 508 ret i64 %res 509} 510 511define i32 @pairwise_umax_v4i32(<4 x i32> %arg) { 512; SIMD128-LABEL: pairwise_umax_v4i32: 513; SIMD128: .functype pairwise_umax_v4i32 (v128) -> (i32) 514; SIMD128-NEXT: # %bb.0: 515; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 516; SIMD128-NEXT: i32x4.max_u $push5=, $0, $pop0 517; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 518; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 519; SIMD128-NEXT: i32x4.max_u $push2=, $pop4, $pop1 520; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 521; SIMD128-NEXT: return $pop3 522 %res = tail call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arg) 523 ret i32 %res 524} 525 526define i16 @pairwise_umax_v8i16(<8 x i16> %arg) { 527; SIMD128-LABEL: pairwise_umax_v8i16: 528; SIMD128: .functype pairwise_umax_v8i16 (v128) -> (i32) 529; SIMD128-NEXT: # %bb.0: 530; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 531; SIMD128-NEXT: i16x8.max_u $push8=, $0, $pop0 532; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 533; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 534; SIMD128-NEXT: i16x8.max_u $push6=, $pop7, $pop1 535; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 536; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 537; SIMD128-NEXT: i16x8.max_u $push3=, $pop5, $pop2 538; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 539; SIMD128-NEXT: return $pop4 540 %res = tail call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arg) 541 ret i16 %res 542} 543 544define i8 @pairwise_umax_v16i8(<16 x i8> %arg) { 545; SIMD128-LABEL: pairwise_umax_v16i8: 546; SIMD128: .functype pairwise_umax_v16i8 (v128) -> (i32) 547; SIMD128-NEXT: # %bb.0: 548; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 549; SIMD128-NEXT: i8x16.max_u $push11=, $0, $pop0 550; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 551; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 552; SIMD128-NEXT: i8x16.max_u $push9=, $pop10, $pop1 553; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 554; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 555; SIMD128-NEXT: i8x16.max_u $push7=, $pop8, $pop2 556; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 557; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 558; SIMD128-NEXT: i8x16.max_u $push4=, $pop6, $pop3 559; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 560; SIMD128-NEXT: return $pop5 561 %res = tail call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arg) 562 ret i8 %res 563} 564 565define i64 @pairwise_umin_v2i64(<2 x i64> %arg) { 566; SIMD128-LABEL: pairwise_umin_v2i64: 567; SIMD128: .functype pairwise_umin_v2i64 (v128) -> (i64) 568; SIMD128-NEXT: # %bb.0: 569; SIMD128-NEXT: i8x16.shuffle $push10=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 570; SIMD128-NEXT: local.tee $push9=, $1=, $pop10 571; SIMD128-NEXT: i64.const $push4=, -1 572; SIMD128-NEXT: i64.const $push3=, 0 573; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 0 574; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 0 575; SIMD128-NEXT: i64.lt_u $push2=, $pop1, $pop0 576; SIMD128-NEXT: i64.select $push5=, $pop4, $pop3, $pop2 577; SIMD128-NEXT: i64x2.replace_lane $push6=, $0, 0, $pop5 578; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop9, $pop6 579; SIMD128-NEXT: i64x2.extract_lane $push8=, $pop7, 0 580; SIMD128-NEXT: return $pop8 581 %res = tail call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %arg) 582 ret i64 %res 583} 584 585define i32 @pairwise_umin_v4i32(<4 x i32> %arg) { 586; SIMD128-LABEL: pairwise_umin_v4i32: 587; SIMD128: .functype pairwise_umin_v4i32 (v128) -> (i32) 588; SIMD128-NEXT: # %bb.0: 589; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 590; SIMD128-NEXT: i32x4.min_u $push5=, $0, $pop0 591; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 592; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 593; SIMD128-NEXT: i32x4.min_u $push2=, $pop4, $pop1 594; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0 595; SIMD128-NEXT: return $pop3 596 %res = tail call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arg) 597 ret i32 %res 598} 599 600define i16 @pairwise_umin_v8i16(<8 x i16> %arg) { 601; SIMD128-LABEL: pairwise_umin_v8i16: 602; SIMD128: .functype pairwise_umin_v8i16 (v128) -> (i32) 603; SIMD128-NEXT: # %bb.0: 604; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 605; SIMD128-NEXT: i16x8.min_u $push8=, $0, $pop0 606; SIMD128-NEXT: local.tee $push7=, $0=, $pop8 607; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 608; SIMD128-NEXT: i16x8.min_u $push6=, $pop7, $pop1 609; SIMD128-NEXT: local.tee $push5=, $0=, $pop6 610; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 611; SIMD128-NEXT: i16x8.min_u $push3=, $pop5, $pop2 612; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0 613; SIMD128-NEXT: return $pop4 614 %res = tail call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arg) 615 ret i16 %res 616} 617 618define i8 @pairwise_umin_v16i8(<16 x i8> %arg) { 619; SIMD128-LABEL: pairwise_umin_v16i8: 620; SIMD128: .functype pairwise_umin_v16i8 (v128) -> (i32) 621; SIMD128-NEXT: # %bb.0: 622; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 623; SIMD128-NEXT: i8x16.min_u $push11=, $0, $pop0 624; SIMD128-NEXT: local.tee $push10=, $0=, $pop11 625; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 626; SIMD128-NEXT: i8x16.min_u $push9=, $pop10, $pop1 627; SIMD128-NEXT: local.tee $push8=, $0=, $pop9 628; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 629; SIMD128-NEXT: i8x16.min_u $push7=, $pop8, $pop2 630; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 631; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 632; SIMD128-NEXT: i8x16.min_u $push4=, $pop6, $pop3 633; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 634; SIMD128-NEXT: return $pop5 635 %res = tail call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arg) 636 ret i8 %res 637} 638 639define double @pairwise_add_v2f64(<2 x double> %arg) { 640; SIMD128-LABEL: pairwise_add_v2f64: 641; SIMD128: .functype pairwise_add_v2f64 (v128) -> (f64) 642; SIMD128-NEXT: # %bb.0: 643; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 644; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 645; SIMD128-NEXT: f64.add $push2=, $pop1, $pop0 646; SIMD128-NEXT: return $pop2 647 %res = tail call double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %arg) 648 ret double%res 649} 650 651define double @pairwise_add_v2f64_fast(<2 x double> %arg) { 652; SIMD128-LABEL: pairwise_add_v2f64_fast: 653; SIMD128: .functype pairwise_add_v2f64_fast (v128) -> (f64) 654; SIMD128-NEXT: # %bb.0: 655; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 656; SIMD128-NEXT: f64x2.add $push1=, $0, $pop0 657; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0 658; SIMD128-NEXT: return $pop2 659 %res = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %arg) 660 ret double%res 661} 662 663define float @pairwise_add_v4f32(<4 x float> %arg) { 664; SIMD128-LABEL: pairwise_add_v4f32: 665; SIMD128: .functype pairwise_add_v4f32 (v128) -> (f32) 666; SIMD128-NEXT: # %bb.0: 667; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 668; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 669; SIMD128-NEXT: f32.add $push2=, $pop1, $pop0 670; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 671; SIMD128-NEXT: f32.add $push4=, $pop2, $pop3 672; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 673; SIMD128-NEXT: f32.add $push6=, $pop4, $pop5 674; SIMD128-NEXT: return $pop6 675 %res = tail call float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg) 676 ret float %res 677} 678 679define float @pairwise_add_v4f32_fast(<4 x float> %arg) { 680; SIMD128-LABEL: pairwise_add_v4f32_fast: 681; SIMD128: .functype pairwise_add_v4f32_fast (v128) -> (f32) 682; SIMD128-NEXT: # %bb.0: 683; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3 684; SIMD128-NEXT: f32x4.add $push5=, $0, $pop0 685; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 686; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 687; SIMD128-NEXT: f32x4.add $push2=, $pop4, $pop1 688; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 689; SIMD128-NEXT: return $pop3 690 %res = tail call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg) 691 ret float %res 692} 693 694define float @pairwise_add_v4f32_reassoc(<4 x float> %arg) { 695; SIMD128-LABEL: pairwise_add_v4f32_reassoc: 696; SIMD128: .functype pairwise_add_v4f32_reassoc (v128) -> (f32) 697; SIMD128-NEXT: # %bb.0: 698; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3 699; SIMD128-NEXT: f32x4.add $push5=, $0, $pop0 700; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 701; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 702; SIMD128-NEXT: f32x4.add $push2=, $pop4, $pop1 703; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 704; SIMD128-NEXT: return $pop3 705 %res = tail call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg) 706 ret float %res 707} 708 709define double @pairwise_mul_v2f64(<2 x double> %arg) { 710; SIMD128-LABEL: pairwise_mul_v2f64: 711; SIMD128: .functype pairwise_mul_v2f64 (v128) -> (f64) 712; SIMD128-NEXT: # %bb.0: 713; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 0 714; SIMD128-NEXT: f64.const $push1=, -0x0p0 715; SIMD128-NEXT: f64.mul $push2=, $pop0, $pop1 716; SIMD128-NEXT: f64x2.extract_lane $push3=, $0, 1 717; SIMD128-NEXT: f64.mul $push4=, $pop2, $pop3 718; SIMD128-NEXT: return $pop4 719 %res = tail call double @llvm.vector.reduce.fmul.v2f64(double -0.0, <2 x double> %arg) 720 ret double%res 721} 722 723define double @pairwise_mul_v2f64_fast(<2 x double> %arg) { 724; SIMD128-LABEL: pairwise_mul_v2f64_fast: 725; SIMD128: .functype pairwise_mul_v2f64_fast (v128) -> (f64) 726; SIMD128-NEXT: # %bb.0: 727; SIMD128-NEXT: f64.const $push0=, 0x0p0 728; SIMD128-NEXT: return $pop0 729 %res = tail call fast double @llvm.vector.reduce.fmul.v2f64(double -0.0, <2 x double> %arg) 730 ret double%res 731} 732 733define float @pairwise_mul_v4f32(<4 x float> %arg) { 734; SIMD128-LABEL: pairwise_mul_v4f32: 735; SIMD128: .functype pairwise_mul_v4f32 (v128) -> (f32) 736; SIMD128-NEXT: # %bb.0: 737; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 0 738; SIMD128-NEXT: f32.const $push1=, -0x0p0 739; SIMD128-NEXT: f32.mul $push2=, $pop0, $pop1 740; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 1 741; SIMD128-NEXT: f32.mul $push4=, $pop2, $pop3 742; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 2 743; SIMD128-NEXT: f32.mul $push6=, $pop4, $pop5 744; SIMD128-NEXT: f32x4.extract_lane $push7=, $0, 3 745; SIMD128-NEXT: f32.mul $push8=, $pop6, $pop7 746; SIMD128-NEXT: return $pop8 747 %res = tail call float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg) 748 ret float %res 749} 750 751define float @pairwise_mul_v4f32_fast(<4 x float> %arg) { 752; SIMD128-LABEL: pairwise_mul_v4f32_fast: 753; SIMD128: .functype pairwise_mul_v4f32_fast (v128) -> (f32) 754; SIMD128-NEXT: # %bb.0: 755; SIMD128-NEXT: f32.const $push0=, 0x0p0 756; SIMD128-NEXT: return $pop0 757 %res = tail call fast float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg) 758 ret float %res 759} 760 761define float @pairwise_mul_v4f32_reassoc(<4 x float> %arg) { 762; SIMD128-LABEL: pairwise_mul_v4f32_reassoc: 763; SIMD128: .functype pairwise_mul_v4f32_reassoc (v128) -> (f32) 764; SIMD128-NEXT: # %bb.0: 765; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 766; SIMD128-NEXT: f32x4.mul $push7=, $0, $pop0 767; SIMD128-NEXT: local.tee $push6=, $0=, $pop7 768; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 769; SIMD128-NEXT: f32x4.mul $push2=, $pop6, $pop1 770; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 771; SIMD128-NEXT: f32.const $push4=, -0x0p0 772; SIMD128-NEXT: f32.mul $push5=, $pop3, $pop4 773; SIMD128-NEXT: return $pop5 774 %res = tail call reassoc float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg) 775 ret float %res 776} 777 778define double @pairwise_max_v2f64(<2 x double> %arg) { 779; SIMD128-LABEL: pairwise_max_v2f64: 780; SIMD128: .functype pairwise_max_v2f64 (v128) -> (f64) 781; SIMD128-NEXT: # %bb.0: 782; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 783; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 784; SIMD128-NEXT: call $push2=, fmax, $pop1, $pop0 785; SIMD128-NEXT: return $pop2 786 %res = tail call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg) 787 ret double%res 788} 789 790define double @pairwise_max_v2f64_fast(<2 x double> %arg) { 791; SIMD128-LABEL: pairwise_max_v2f64_fast: 792; SIMD128: .functype pairwise_max_v2f64_fast (v128) -> (f64) 793; SIMD128-NEXT: # %bb.0: 794; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 795; SIMD128-NEXT: f64x2.pmax $push1=, $0, $pop0 796; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0 797; SIMD128-NEXT: return $pop2 798 %res = tail call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg) 799 ret double%res 800} 801 802define float @pairwise_max_v4f32(<4 x float> %arg) { 803; SIMD128-LABEL: pairwise_max_v4f32: 804; SIMD128: .functype pairwise_max_v4f32 (v128) -> (f32) 805; SIMD128-NEXT: # %bb.0: 806; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0 807; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1 808; SIMD128-NEXT: call $push4=, fmaxf, $pop3, $pop2 809; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2 810; SIMD128-NEXT: call $push5=, fmaxf, $pop4, $pop1 811; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3 812; SIMD128-NEXT: call $push6=, fmaxf, $pop5, $pop0 813; SIMD128-NEXT: return $pop6 814 %res = tail call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg) 815 ret float %res 816} 817 818define float @pairwise_max_v4f32_fast(<4 x float> %arg) { 819; SIMD128-LABEL: pairwise_max_v4f32_fast: 820; SIMD128: .functype pairwise_max_v4f32_fast (v128) -> (f32) 821; SIMD128-NEXT: # %bb.0: 822; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 823; SIMD128-NEXT: f32x4.pmax $push5=, $0, $pop0 824; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 825; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 826; SIMD128-NEXT: f32x4.pmax $push2=, $pop4, $pop1 827; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 828; SIMD128-NEXT: return $pop3 829 %res = tail call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg) 830 ret float %res 831} 832 833define float @pairwise_max_v4f32_reassoc(<4 x float> %arg) { 834; SIMD128-LABEL: pairwise_max_v4f32_reassoc: 835; SIMD128: .functype pairwise_max_v4f32_reassoc (v128) -> (f32) 836; SIMD128-NEXT: # %bb.0: 837; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0 838; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1 839; SIMD128-NEXT: call $push4=, fmaxf, $pop3, $pop2 840; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2 841; SIMD128-NEXT: call $push5=, fmaxf, $pop4, $pop1 842; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3 843; SIMD128-NEXT: call $push6=, fmaxf, $pop5, $pop0 844; SIMD128-NEXT: return $pop6 845 %res = tail call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg) 846 ret float %res 847} 848 849define double @pairwise_min_v2f64(<2 x double> %arg) { 850; SIMD128-LABEL: pairwise_min_v2f64: 851; SIMD128: .functype pairwise_min_v2f64 (v128) -> (f64) 852; SIMD128-NEXT: # %bb.0: 853; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 854; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 855; SIMD128-NEXT: call $push2=, fmin, $pop1, $pop0 856; SIMD128-NEXT: return $pop2 857 %res = tail call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg) 858 ret double%res 859} 860 861define double @pairwise_min_v2f64_fast(<2 x double> %arg) { 862; SIMD128-LABEL: pairwise_min_v2f64_fast: 863; SIMD128: .functype pairwise_min_v2f64_fast (v128) -> (f64) 864; SIMD128-NEXT: # %bb.0: 865; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 866; SIMD128-NEXT: f64x2.pmin $push1=, $0, $pop0 867; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0 868; SIMD128-NEXT: return $pop2 869 %res = tail call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg) 870 ret double%res 871} 872 873define float @pairwise_min_v4f32(<4 x float> %arg) { 874; SIMD128-LABEL: pairwise_min_v4f32: 875; SIMD128: .functype pairwise_min_v4f32 (v128) -> (f32) 876; SIMD128-NEXT: # %bb.0: 877; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0 878; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1 879; SIMD128-NEXT: call $push4=, fminf, $pop3, $pop2 880; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2 881; SIMD128-NEXT: call $push5=, fminf, $pop4, $pop1 882; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3 883; SIMD128-NEXT: call $push6=, fminf, $pop5, $pop0 884; SIMD128-NEXT: return $pop6 885 %res = tail call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg) 886 ret float %res 887} 888 889define float @pairwise_min_v4f32_fast(<4 x float> %arg) { 890; SIMD128-LABEL: pairwise_min_v4f32_fast: 891; SIMD128: .functype pairwise_min_v4f32_fast (v128) -> (f32) 892; SIMD128-NEXT: # %bb.0: 893; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 894; SIMD128-NEXT: f32x4.pmin $push5=, $0, $pop0 895; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 896; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 897; SIMD128-NEXT: f32x4.pmin $push2=, $pop4, $pop1 898; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 899; SIMD128-NEXT: return $pop3 900 %res = tail call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg) 901 ret float %res 902} 903 904define float @pairwise_min_v4f32_reassoc(<4 x float> %arg) { 905; SIMD128-LABEL: pairwise_min_v4f32_reassoc: 906; SIMD128: .functype pairwise_min_v4f32_reassoc (v128) -> (f32) 907; SIMD128-NEXT: # %bb.0: 908; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0 909; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1 910; SIMD128-NEXT: call $push4=, fminf, $pop3, $pop2 911; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2 912; SIMD128-NEXT: call $push5=, fminf, $pop4, $pop1 913; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3 914; SIMD128-NEXT: call $push6=, fminf, $pop5, $pop0 915; SIMD128-NEXT: return $pop6 916 %res = tail call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg) 917 ret float %res 918} 919 920define double @pairwise_maximum_v2f64(<2 x double> %arg) { 921; SIMD128-LABEL: pairwise_maximum_v2f64: 922; SIMD128: .functype pairwise_maximum_v2f64 (v128) -> (f64) 923; SIMD128-NEXT: # %bb.0: 924; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 925; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 926; SIMD128-NEXT: f64.max $push2=, $pop1, $pop0 927; SIMD128-NEXT: return $pop2 928 %res = tail call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %arg) 929 ret double%res 930} 931 932define double @pairwise_maximum_v2f64_fast(<2 x double> %arg) { 933; SIMD128-LABEL: pairwise_maximum_v2f64_fast: 934; SIMD128: .functype pairwise_maximum_v2f64_fast (v128) -> (f64) 935; SIMD128-NEXT: # %bb.0: 936; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 937; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 938; SIMD128-NEXT: f64.max $push2=, $pop1, $pop0 939; SIMD128-NEXT: return $pop2 940 %res = tail call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %arg) 941 ret double%res 942} 943 944define float @pairwise_maximum_v4f32(<4 x float> %arg) { 945; SIMD128-LABEL: pairwise_maximum_v4f32: 946; SIMD128: .functype pairwise_maximum_v4f32 (v128) -> (f32) 947; SIMD128-NEXT: # %bb.0: 948; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 949; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 950; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0 951; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 952; SIMD128-NEXT: f32.max $push4=, $pop2, $pop3 953; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 954; SIMD128-NEXT: f32.max $push6=, $pop4, $pop5 955; SIMD128-NEXT: return $pop6 956 %res = tail call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg) 957 ret float %res 958} 959 960define float @pairwise_maximum_v4f32_fast(<4 x float> %arg) { 961; SIMD128-LABEL: pairwise_maximum_v4f32_fast: 962; SIMD128: .functype pairwise_maximum_v4f32_fast (v128) -> (f32) 963; SIMD128-NEXT: # %bb.0: 964; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 965; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 966; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0 967; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 968; SIMD128-NEXT: f32.max $push4=, $pop2, $pop3 969; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 970; SIMD128-NEXT: f32.max $push6=, $pop4, $pop5 971; SIMD128-NEXT: return $pop6 972 %res = tail call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg) 973 ret float %res 974} 975 976define float @pairwise_maximum_v4f32_reassoc(<4 x float> %arg) { 977; SIMD128-LABEL: pairwise_maximum_v4f32_reassoc: 978; SIMD128: .functype pairwise_maximum_v4f32_reassoc (v128) -> (f32) 979; SIMD128-NEXT: # %bb.0: 980; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 981; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 982; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0 983; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 984; SIMD128-NEXT: f32.max $push4=, $pop2, $pop3 985; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 986; SIMD128-NEXT: f32.max $push6=, $pop4, $pop5 987; SIMD128-NEXT: return $pop6 988 %res = tail call reassoc float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg) 989 ret float %res 990} 991 992define double @pairwise_minimum_v2f64(<2 x double> %arg) { 993; SIMD128-LABEL: pairwise_minimum_v2f64: 994; SIMD128: .functype pairwise_minimum_v2f64 (v128) -> (f64) 995; SIMD128-NEXT: # %bb.0: 996; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 997; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 998; SIMD128-NEXT: f64.min $push2=, $pop1, $pop0 999; SIMD128-NEXT: return $pop2 1000 %res = tail call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %arg) 1001 ret double%res 1002} 1003 1004define double @pairwise_minimum_v2f64_fast(<2 x double> %arg) { 1005; SIMD128-LABEL: pairwise_minimum_v2f64_fast: 1006; SIMD128: .functype pairwise_minimum_v2f64_fast (v128) -> (f64) 1007; SIMD128-NEXT: # %bb.0: 1008; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0 1009; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 1010; SIMD128-NEXT: f64.min $push2=, $pop1, $pop0 1011; SIMD128-NEXT: return $pop2 1012 %res = tail call fast double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %arg) 1013 ret double%res 1014} 1015 1016define float @pairwise_minimum_v4f32(<4 x float> %arg) { 1017; SIMD128-LABEL: pairwise_minimum_v4f32: 1018; SIMD128: .functype pairwise_minimum_v4f32 (v128) -> (f32) 1019; SIMD128-NEXT: # %bb.0: 1020; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 1021; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 1022; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0 1023; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 1024; SIMD128-NEXT: f32.min $push4=, $pop2, $pop3 1025; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 1026; SIMD128-NEXT: f32.min $push6=, $pop4, $pop5 1027; SIMD128-NEXT: return $pop6 1028 %res = tail call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg) 1029 ret float %res 1030} 1031 1032define float @pairwise_minimum_v4f32_fast(<4 x float> %arg) { 1033; SIMD128-LABEL: pairwise_minimum_v4f32_fast: 1034; SIMD128: .functype pairwise_minimum_v4f32_fast (v128) -> (f32) 1035; SIMD128-NEXT: # %bb.0: 1036; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 1037; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 1038; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0 1039; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 1040; SIMD128-NEXT: f32.min $push4=, $pop2, $pop3 1041; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 1042; SIMD128-NEXT: f32.min $push6=, $pop4, $pop5 1043; SIMD128-NEXT: return $pop6 1044 %res = tail call fast float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg) 1045 ret float %res 1046} 1047 1048define float @pairwise_minimum_v4f32_reassoc(<4 x float> %arg) { 1049; SIMD128-LABEL: pairwise_minimum_v4f32_reassoc: 1050; SIMD128: .functype pairwise_minimum_v4f32_reassoc (v128) -> (f32) 1051; SIMD128-NEXT: # %bb.0: 1052; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0 1053; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 1054; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0 1055; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2 1056; SIMD128-NEXT: f32.min $push4=, $pop2, $pop3 1057; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3 1058; SIMD128-NEXT: f32.min $push6=, $pop4, $pop5 1059; SIMD128-NEXT: return $pop6 1060 %res = tail call reassoc float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg) 1061 ret float %res 1062} 1063