1769c22f2SSimon Pilgrim; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,X64 3769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,X64 4769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,X64 5769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 6769c22f2SSimon Pilgrim 7769c22f2SSimon Pilgrim; 8769c22f2SSimon Pilgrim; Fold reduce(trunc(X)) -> trunc(reduce(X)) if more cost efficient 9769c22f2SSimon Pilgrim; 10769c22f2SSimon Pilgrim 11769c22f2SSimon Pilgrim; Cheap AVX512 v8i64 -> v8i32 truncation 12769c22f2SSimon Pilgrimdefine i32 @reduce_add_trunc_v8i64_i32(<8 x i64> %a0) { 13769c22f2SSimon Pilgrim; X64-LABEL: @reduce_add_trunc_v8i64_i32( 14769c22f2SSimon Pilgrim; X64-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]]) 15769c22f2SSimon Pilgrim; X64-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i32 16769c22f2SSimon Pilgrim; X64-NEXT: ret i32 [[RED]] 17769c22f2SSimon Pilgrim; 18769c22f2SSimon Pilgrim; AVX512-LABEL: @reduce_add_trunc_v8i64_i32( 19769c22f2SSimon Pilgrim; AVX512-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i32> 20769c22f2SSimon Pilgrim; AVX512-NEXT: [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]]) 21769c22f2SSimon Pilgrim; AVX512-NEXT: ret i32 [[RED]] 22769c22f2SSimon Pilgrim; 23769c22f2SSimon Pilgrim %tr = trunc <8 x i64> %a0 to <8 x i32> 24769c22f2SSimon Pilgrim %red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr) 25769c22f2SSimon Pilgrim ret i32 %red 26769c22f2SSimon Pilgrim} 27769c22f2SSimon Pilgrimdeclare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 28769c22f2SSimon Pilgrim 29769c22f2SSimon Pilgrim; No legal vXi8 multiplication so vXi16 is always cheaper 30769c22f2SSimon Pilgrimdefine i8 @reduce_mul_trunc_v16i16_i8(<16 x i16> %a0) { 31769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_mul_trunc_v16i16_i8( 32769c22f2SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> [[A0:%.*]]) 33769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = trunc i16 [[TMP1]] to i8 34769c22f2SSimon Pilgrim; CHECK-NEXT: ret i8 [[RED]] 35769c22f2SSimon Pilgrim; 36769c22f2SSimon Pilgrim %tr = trunc <16 x i16> %a0 to <16 x i8> 37769c22f2SSimon Pilgrim %red = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %tr) 38769c22f2SSimon Pilgrim ret i8 %red 39769c22f2SSimon Pilgrim} 40769c22f2SSimon Pilgrimdeclare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) 41769c22f2SSimon Pilgrim 42769c22f2SSimon Pilgrimdefine i8 @reduce_or_trunc_v8i32_i8(<8 x i32> %a0) { 43769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_or_trunc_v8i32_i8( 44769c22f2SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[A0:%.*]]) 45769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = trunc i32 [[TMP1]] to i8 46769c22f2SSimon Pilgrim; CHECK-NEXT: ret i8 [[RED]] 47769c22f2SSimon Pilgrim; 48769c22f2SSimon Pilgrim %tr = trunc <8 x i32> %a0 to <8 x i8> 49769c22f2SSimon Pilgrim %red = tail call i8 @llvm.vector.reduce.or.v8i32(<8 x i8> %tr) 50769c22f2SSimon Pilgrim ret i8 %red 51769c22f2SSimon Pilgrim} 52*5ff44dbaSNikita Popovdeclare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) 53769c22f2SSimon Pilgrim 54769c22f2SSimon Pilgrimdefine i8 @reduce_xor_trunc_v16i64_i8(<16 x i64> %a0) { 55769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8( 56769c22f2SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> [[A0:%.*]]) 57769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i8 58769c22f2SSimon Pilgrim; CHECK-NEXT: ret i8 [[RED]] 59769c22f2SSimon Pilgrim; 60769c22f2SSimon Pilgrim %tr = trunc <16 x i64> %a0 to <16 x i8> 61769c22f2SSimon Pilgrim %red = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %tr) 62769c22f2SSimon Pilgrim ret i8 %red 63769c22f2SSimon Pilgrim} 64769c22f2SSimon Pilgrimdeclare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) 65769c22f2SSimon Pilgrim 66769c22f2SSimon Pilgrim; Truncation source has other uses - OK to truncate reduction 67769c22f2SSimon Pilgrimdefine i16 @reduce_and_trunc_v16i64_i16(<16 x i64> %a0) { 68769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_and_trunc_v16i64_i16( 69769c22f2SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> [[A0:%.*]]) 70769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i16 71769c22f2SSimon Pilgrim; CHECK-NEXT: call void @use_v16i64(<16 x i64> [[A0]]) 72769c22f2SSimon Pilgrim; CHECK-NEXT: ret i16 [[RED]] 73769c22f2SSimon Pilgrim; 74769c22f2SSimon Pilgrim %tr = trunc <16 x i64> %a0 to <16 x i16> 75769c22f2SSimon Pilgrim %red = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %tr) 76769c22f2SSimon Pilgrim call void @use_v16i64(<16 x i64> %a0) 77769c22f2SSimon Pilgrim ret i16 %red 78769c22f2SSimon Pilgrim} 79769c22f2SSimon Pilgrimdeclare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) 80769c22f2SSimon Pilgrim 81769c22f2SSimon Pilgrim; Negative Test: vXi16 multiply is much cheaper than vXi64 82769c22f2SSimon Pilgrimdefine i16 @reduce_mul_trunc_v8i64_i16(<8 x i64> %a0) { 83769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_mul_trunc_v8i64_i16( 84769c22f2SSimon Pilgrim; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i16> 85769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[TR]]) 86769c22f2SSimon Pilgrim; CHECK-NEXT: ret i16 [[RED]] 87769c22f2SSimon Pilgrim; 88769c22f2SSimon Pilgrim %tr = trunc <8 x i64> %a0 to <8 x i16> 89769c22f2SSimon Pilgrim %red = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %tr) 90769c22f2SSimon Pilgrim ret i16 %red 91769c22f2SSimon Pilgrim} 92769c22f2SSimon Pilgrimdeclare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) 93769c22f2SSimon Pilgrim 94769c22f2SSimon Pilgrim; Negative Test: min/max reductions can't use pre-truncated types. 95769c22f2SSimon Pilgrimdefine i8 @reduce_smin_trunc_v16i16_i8(<16 x i16> %a0) { 96769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_smin_trunc_v16i16_i8( 97769c22f2SSimon Pilgrim; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8> 98769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[TR]]) 99769c22f2SSimon Pilgrim; CHECK-NEXT: ret i8 [[RED]] 100769c22f2SSimon Pilgrim; 101769c22f2SSimon Pilgrim %tr = trunc <16 x i16> %a0 to <16 x i8> 102769c22f2SSimon Pilgrim %red = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %tr) 103769c22f2SSimon Pilgrim ret i8 %red 104769c22f2SSimon Pilgrim} 105769c22f2SSimon Pilgrimdeclare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) 106769c22f2SSimon Pilgrim 107769c22f2SSimon Pilgrim; Negative Test: Truncation has other uses. 108769c22f2SSimon Pilgrimdefine i16 @reduce_and_trunc_v16i64_i16_multiuse(<16 x i64> %a0) { 109769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_and_trunc_v16i64_i16_multiuse( 110769c22f2SSimon Pilgrim; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i16> 111769c22f2SSimon Pilgrim; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> [[TR]]) 112769c22f2SSimon Pilgrim; CHECK-NEXT: call void @use_v16i16(<16 x i16> [[TR]]) 113769c22f2SSimon Pilgrim; CHECK-NEXT: ret i16 [[RED]] 114769c22f2SSimon Pilgrim; 115769c22f2SSimon Pilgrim %tr = trunc <16 x i64> %a0 to <16 x i16> 116769c22f2SSimon Pilgrim %red = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %tr) 117769c22f2SSimon Pilgrim call void @use_v16i16(<16 x i16> %tr) 118769c22f2SSimon Pilgrim ret i16 %red 119769c22f2SSimon Pilgrim} 120769c22f2SSimon Pilgrim 121769c22f2SSimon Pilgrimdeclare void @use_v16i64(<16 x i64>) 122769c22f2SSimon Pilgrimdeclare void @use_v16i16(<16 x i16>) 123769c22f2SSimon Pilgrim 124