1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=vector-combine -S -mtriple=riscv32 -mattr=+v | FileCheck %s 3; RUN: opt < %s -passes=vector-combine -S -mtriple=riscv64 -mattr=+v | FileCheck %s 4 5; 6; Fold reduce(cast(X)) -> trunc(cast(X)) if more cost efficient 7; 8 9define i32 @reduce_add_trunc_v8i64_to_v8i32(<8 x i64> %a0) { 10; CHECK-LABEL: @reduce_add_trunc_v8i64_to_v8i32( 11; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]]) 12; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i32 13; CHECK-NEXT: ret i32 [[RED]] 14; 15 %tr = trunc <8 x i64> %a0 to <8 x i32> 16 %red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr) 17 ret i32 %red 18} 19 20define i16 @reduce_add_trunc_v8i64_to_v8i16(<8 x i64> %a0) { 21; CHECK-LABEL: @reduce_add_trunc_v8i64_to_v8i16( 22; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]]) 23; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i16 24; CHECK-NEXT: ret i16 [[RED]] 25; 26 %tr = trunc <8 x i64> %a0 to <8 x i16> 27 %red = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %tr) 28 ret i16 %red 29} 30 31define i8 @reduce_add_trunc_v8i64_to_v8i8(<8 x i64> %a0) { 32; CHECK-LABEL: @reduce_add_trunc_v8i64_to_v8i8( 33; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]]) 34; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i8 35; CHECK-NEXT: ret i8 [[RED]] 36; 37 %tr = trunc <8 x i64> %a0 to <8 x i8> 38 %red = tail call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %tr) 39 ret i8 %red 40} 41 42define i8 @reduce_or_trunc_v8i32_i8(<8 x i32> %a0) { 43; CHECK-LABEL: @reduce_or_trunc_v8i32_i8( 44; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[A0:%.*]]) 45; CHECK-NEXT: [[RED:%.*]] = trunc i32 [[TMP1]] to i8 46; CHECK-NEXT: ret i8 [[RED]] 47; 48 %tr = trunc <8 x i32> %a0 to <8 x i8> 49 %red = tail call i8 @llvm.vector.reduce.or.v8i32(<8 x i8> %tr) 50 ret i8 %red 51} 52 53define i8 @reduce_xor_trunc_v16i64_i8(<16 x i64> %a0) { 54; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8( 55; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> [[A0:%.*]]) 56; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i8 57; CHECK-NEXT: ret i8 [[RED]] 58; 59 %tr = trunc <16 x i64> %a0 to <16 x i8> 60 %red = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %tr) 61 ret i8 %red 62} 63 64define i16 @reduce_mul_trunc_v8i64_i16(<8 x i64> %a0) { 65; CHECK-LABEL: @reduce_mul_trunc_v8i64_i16( 66; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i16> 67; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[TR]]) 68; CHECK-NEXT: ret i16 [[RED]] 69; 70 %tr = trunc <8 x i64> %a0 to <8 x i16> 71 %red = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %tr) 72 ret i16 %red 73} 74 75define i32 @reduce_or_sext_v8i8_to_v8i32(<8 x i8> %a0) { 76; CHECK-LABEL: @reduce_or_sext_v8i8_to_v8i32( 77; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[A0:%.*]]) 78; CHECK-NEXT: [[RED:%.*]] = sext i8 [[TMP1]] to i32 79; CHECK-NEXT: ret i32 [[RED]] 80; 81 %tr = sext <8 x i8> %a0 to <8 x i32> 82 %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr) 83 ret i32 %red 84} 85 86define i32 @reduce_or_sext_v8i16_to_v8i32(<8 x i16> %a0) { 87; CHECK-LABEL: @reduce_or_sext_v8i16_to_v8i32( 88; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[A0:%.*]]) 89; CHECK-NEXT: [[RED:%.*]] = sext i16 [[TMP1]] to i32 90; CHECK-NEXT: ret i32 [[RED]] 91; 92 %tr = sext <8 x i16> %a0 to <8 x i32> 93 %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr) 94 ret i32 %red 95} 96 97define i32 @reduce_or_zext_v8i8_to_v8i32(<8 x i8> %a0) { 98; CHECK-LABEL: @reduce_or_zext_v8i8_to_v8i32( 99; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[A0:%.*]]) 100; CHECK-NEXT: [[RED:%.*]] = zext i8 [[TMP1]] to i32 101; CHECK-NEXT: ret i32 [[RED]] 102; 103 %tr = zext <8 x i8> %a0 to <8 x i32> 104 %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr) 105 ret i32 %red 106} 107 108define i32 @reduce_or_zext_v8i16_to_v8i32(<8 x i16> %a0) { 109; CHECK-LABEL: @reduce_or_zext_v8i16_to_v8i32( 110; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[A0:%.*]]) 111; CHECK-NEXT: [[RED:%.*]] = zext i16 [[TMP1]] to i32 112; CHECK-NEXT: ret i32 [[RED]] 113; 114 %tr = zext <8 x i16> %a0 to <8 x i32> 115 %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr) 116 ret i32 %red 117} 118 119; Negative case - narrowing the reduce (to i8) is illegal. 120; TODO: We could narrow to i16 instead. 121define i32 @reduce_add_trunc_v8i8_to_v8i32(<8 x i8> %a0) { 122; CHECK-LABEL: @reduce_add_trunc_v8i8_to_v8i32( 123; CHECK-NEXT: [[TR:%.*]] = zext <8 x i8> [[A0:%.*]] to <8 x i32> 124; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]]) 125; CHECK-NEXT: ret i32 [[RED]] 126; 127 %tr = zext <8 x i8> %a0 to <8 x i32> 128 %red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr) 129 ret i32 %red 130} 131 132 133declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 134declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 135declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 136declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) 137declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) 138declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) 139declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) 140 141