xref: /llvm-project/llvm/test/Transforms/VectorCombine/X86/reduction-of-truncations.ll (revision 5ff44dbaea27d442c89278871dc2f75942d54716)
1769c22f2SSimon Pilgrim; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=CHECK,X64
3769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,X64
4769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,X64
5769c22f2SSimon Pilgrim; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
6769c22f2SSimon Pilgrim
7769c22f2SSimon Pilgrim;
8769c22f2SSimon Pilgrim; Fold reduce(trunc(X)) -> trunc(reduce(X)) if more cost efficient
9769c22f2SSimon Pilgrim;
10769c22f2SSimon Pilgrim
11769c22f2SSimon Pilgrim; Cheap AVX512 v8i64 -> v8i32 truncation
12769c22f2SSimon Pilgrimdefine i32 @reduce_add_trunc_v8i64_i32(<8 x i64> %a0)  {
13769c22f2SSimon Pilgrim; X64-LABEL: @reduce_add_trunc_v8i64_i32(
14769c22f2SSimon Pilgrim; X64-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]])
15769c22f2SSimon Pilgrim; X64-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i32
16769c22f2SSimon Pilgrim; X64-NEXT:    ret i32 [[RED]]
17769c22f2SSimon Pilgrim;
18769c22f2SSimon Pilgrim; AVX512-LABEL: @reduce_add_trunc_v8i64_i32(
19769c22f2SSimon Pilgrim; AVX512-NEXT:    [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i32>
20769c22f2SSimon Pilgrim; AVX512-NEXT:    [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]])
21769c22f2SSimon Pilgrim; AVX512-NEXT:    ret i32 [[RED]]
22769c22f2SSimon Pilgrim;
23769c22f2SSimon Pilgrim  %tr = trunc <8 x i64> %a0 to <8 x i32>
24769c22f2SSimon Pilgrim  %red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr)
25769c22f2SSimon Pilgrim  ret i32 %red
26769c22f2SSimon Pilgrim}
27769c22f2SSimon Pilgrimdeclare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
28769c22f2SSimon Pilgrim
29769c22f2SSimon Pilgrim; No legal vXi8 multiplication so vXi16 is always cheaper
30769c22f2SSimon Pilgrimdefine i8 @reduce_mul_trunc_v16i16_i8(<16 x i16> %a0)  {
31769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_mul_trunc_v16i16_i8(
32769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> [[A0:%.*]])
33769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = trunc i16 [[TMP1]] to i8
34769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i8 [[RED]]
35769c22f2SSimon Pilgrim;
36769c22f2SSimon Pilgrim  %tr = trunc <16 x i16> %a0 to <16 x i8>
37769c22f2SSimon Pilgrim  %red = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %tr)
38769c22f2SSimon Pilgrim  ret i8 %red
39769c22f2SSimon Pilgrim}
40769c22f2SSimon Pilgrimdeclare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
41769c22f2SSimon Pilgrim
42769c22f2SSimon Pilgrimdefine i8 @reduce_or_trunc_v8i32_i8(<8 x i32> %a0)  {
43769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_or_trunc_v8i32_i8(
44769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[A0:%.*]])
45769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = trunc i32 [[TMP1]] to i8
46769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i8 [[RED]]
47769c22f2SSimon Pilgrim;
48769c22f2SSimon Pilgrim  %tr = trunc <8 x i32> %a0 to <8 x i8>
49769c22f2SSimon Pilgrim  %red = tail call i8 @llvm.vector.reduce.or.v8i32(<8 x i8> %tr)
50769c22f2SSimon Pilgrim  ret i8 %red
51769c22f2SSimon Pilgrim}
52*5ff44dbaSNikita Popovdeclare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
53769c22f2SSimon Pilgrim
54769c22f2SSimon Pilgrimdefine i8 @reduce_xor_trunc_v16i64_i8(<16 x i64> %a0)  {
55769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8(
56769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> [[A0:%.*]])
57769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i8
58769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i8 [[RED]]
59769c22f2SSimon Pilgrim;
60769c22f2SSimon Pilgrim  %tr = trunc <16 x i64> %a0 to <16 x i8>
61769c22f2SSimon Pilgrim  %red = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %tr)
62769c22f2SSimon Pilgrim  ret i8 %red
63769c22f2SSimon Pilgrim}
64769c22f2SSimon Pilgrimdeclare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
65769c22f2SSimon Pilgrim
66769c22f2SSimon Pilgrim; Truncation source has other uses - OK to truncate reduction
67769c22f2SSimon Pilgrimdefine i16 @reduce_and_trunc_v16i64_i16(<16 x i64> %a0)  {
68769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_and_trunc_v16i64_i16(
69769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> [[A0:%.*]])
70769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i16
71769c22f2SSimon Pilgrim; CHECK-NEXT:    call void @use_v16i64(<16 x i64> [[A0]])
72769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i16 [[RED]]
73769c22f2SSimon Pilgrim;
74769c22f2SSimon Pilgrim  %tr = trunc <16 x i64> %a0 to <16 x i16>
75769c22f2SSimon Pilgrim  %red = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %tr)
76769c22f2SSimon Pilgrim  call void @use_v16i64(<16 x i64> %a0)
77769c22f2SSimon Pilgrim  ret i16 %red
78769c22f2SSimon Pilgrim}
79769c22f2SSimon Pilgrimdeclare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
80769c22f2SSimon Pilgrim
81769c22f2SSimon Pilgrim; Negative Test: vXi16 multiply is much cheaper than vXi64
82769c22f2SSimon Pilgrimdefine i16 @reduce_mul_trunc_v8i64_i16(<8 x i64> %a0)  {
83769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_mul_trunc_v8i64_i16(
84769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i16>
85769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[TR]])
86769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i16 [[RED]]
87769c22f2SSimon Pilgrim;
88769c22f2SSimon Pilgrim  %tr = trunc <8 x i64> %a0 to <8 x i16>
89769c22f2SSimon Pilgrim  %red = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %tr)
90769c22f2SSimon Pilgrim  ret i16 %red
91769c22f2SSimon Pilgrim}
92769c22f2SSimon Pilgrimdeclare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
93769c22f2SSimon Pilgrim
94769c22f2SSimon Pilgrim; Negative Test: min/max reductions can't use pre-truncated types.
95769c22f2SSimon Pilgrimdefine i8 @reduce_smin_trunc_v16i16_i8(<16 x i16> %a0)  {
96769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_smin_trunc_v16i16_i8(
97769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TR:%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8>
98769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[TR]])
99769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i8 [[RED]]
100769c22f2SSimon Pilgrim;
101769c22f2SSimon Pilgrim  %tr = trunc <16 x i16> %a0 to <16 x i8>
102769c22f2SSimon Pilgrim  %red = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %tr)
103769c22f2SSimon Pilgrim  ret i8 %red
104769c22f2SSimon Pilgrim}
105769c22f2SSimon Pilgrimdeclare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
106769c22f2SSimon Pilgrim
107769c22f2SSimon Pilgrim; Negative Test: Truncation has other uses.
108769c22f2SSimon Pilgrimdefine i16 @reduce_and_trunc_v16i64_i16_multiuse(<16 x i64> %a0)  {
109769c22f2SSimon Pilgrim; CHECK-LABEL: @reduce_and_trunc_v16i64_i16_multiuse(
110769c22f2SSimon Pilgrim; CHECK-NEXT:    [[TR:%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i16>
111769c22f2SSimon Pilgrim; CHECK-NEXT:    [[RED:%.*]] = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> [[TR]])
112769c22f2SSimon Pilgrim; CHECK-NEXT:    call void @use_v16i16(<16 x i16> [[TR]])
113769c22f2SSimon Pilgrim; CHECK-NEXT:    ret i16 [[RED]]
114769c22f2SSimon Pilgrim;
115769c22f2SSimon Pilgrim  %tr = trunc <16 x i64> %a0 to <16 x i16>
116769c22f2SSimon Pilgrim  %red = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %tr)
117769c22f2SSimon Pilgrim  call void @use_v16i16(<16 x i16> %tr)
118769c22f2SSimon Pilgrim  ret i16 %red
119769c22f2SSimon Pilgrim}
120769c22f2SSimon Pilgrim
121769c22f2SSimon Pilgrimdeclare void @use_v16i64(<16 x i64>)
122769c22f2SSimon Pilgrimdeclare void @use_v16i16(<16 x i16>)
123769c22f2SSimon Pilgrim
124