xref: /llvm-project/llvm/test/Transforms/VectorCombine/RISCV/vecreduce-of-cast.ll (revision ded35c0c3ad371287e80872d6bd104ce3f7d2864)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=vector-combine -S -mtriple=riscv32 -mattr=+v | FileCheck  %s
3; RUN: opt < %s -passes=vector-combine -S -mtriple=riscv64 -mattr=+v | FileCheck  %s
4
5;
6; Fold reduce(cast(X)) -> trunc(cast(X)) if more cost efficient
7;
8
9define i32 @reduce_add_trunc_v8i64_to_v8i32(<8 x i64> %a0)  {
10; CHECK-LABEL: @reduce_add_trunc_v8i64_to_v8i32(
11; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]])
12; CHECK-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i32
13; CHECK-NEXT:    ret i32 [[RED]]
14;
15  %tr = trunc <8 x i64> %a0 to <8 x i32>
16  %red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr)
17  ret i32 %red
18}
19
20define i16 @reduce_add_trunc_v8i64_to_v8i16(<8 x i64> %a0)  {
21; CHECK-LABEL: @reduce_add_trunc_v8i64_to_v8i16(
22; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]])
23; CHECK-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i16
24; CHECK-NEXT:    ret i16 [[RED]]
25;
26  %tr = trunc <8 x i64> %a0 to <8 x i16>
27  %red = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %tr)
28  ret i16 %red
29}
30
31define i8 @reduce_add_trunc_v8i64_to_v8i8(<8 x i64> %a0)  {
32; CHECK-LABEL: @reduce_add_trunc_v8i64_to_v8i8(
33; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]])
34; CHECK-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i8
35; CHECK-NEXT:    ret i8 [[RED]]
36;
37  %tr = trunc <8 x i64> %a0 to <8 x i8>
38  %red = tail call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %tr)
39  ret i8 %red
40}
41
42define i8 @reduce_or_trunc_v8i32_i8(<8 x i32> %a0)  {
43; CHECK-LABEL: @reduce_or_trunc_v8i32_i8(
44; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[A0:%.*]])
45; CHECK-NEXT:    [[RED:%.*]] = trunc i32 [[TMP1]] to i8
46; CHECK-NEXT:    ret i8 [[RED]]
47;
48  %tr = trunc <8 x i32> %a0 to <8 x i8>
49  %red = tail call i8 @llvm.vector.reduce.or.v8i32(<8 x i8> %tr)
50  ret i8 %red
51}
52
53define i8 @reduce_xor_trunc_v16i64_i8(<16 x i64> %a0)  {
54; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8(
55; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> [[A0:%.*]])
56; CHECK-NEXT:    [[RED:%.*]] = trunc i64 [[TMP1]] to i8
57; CHECK-NEXT:    ret i8 [[RED]]
58;
59  %tr = trunc <16 x i64> %a0 to <16 x i8>
60  %red = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %tr)
61  ret i8 %red
62}
63
64define i16 @reduce_mul_trunc_v8i64_i16(<8 x i64> %a0)  {
65; CHECK-LABEL: @reduce_mul_trunc_v8i64_i16(
66; CHECK-NEXT:    [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i16>
67; CHECK-NEXT:    [[RED:%.*]] = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[TR]])
68; CHECK-NEXT:    ret i16 [[RED]]
69;
70  %tr = trunc <8 x i64> %a0 to <8 x i16>
71  %red = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %tr)
72  ret i16 %red
73}
74
75define i32 @reduce_or_sext_v8i8_to_v8i32(<8 x i8> %a0)  {
76; CHECK-LABEL: @reduce_or_sext_v8i8_to_v8i32(
77; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[A0:%.*]])
78; CHECK-NEXT:    [[RED:%.*]] = sext i8 [[TMP1]] to i32
79; CHECK-NEXT:    ret i32 [[RED]]
80;
81  %tr = sext <8 x i8> %a0 to <8 x i32>
82  %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr)
83  ret i32 %red
84}
85
86define i32 @reduce_or_sext_v8i16_to_v8i32(<8 x i16> %a0)  {
87; CHECK-LABEL: @reduce_or_sext_v8i16_to_v8i32(
88; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[A0:%.*]])
89; CHECK-NEXT:    [[RED:%.*]] = sext i16 [[TMP1]] to i32
90; CHECK-NEXT:    ret i32 [[RED]]
91;
92  %tr = sext <8 x i16> %a0 to <8 x i32>
93  %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr)
94  ret i32 %red
95}
96
97define i32 @reduce_or_zext_v8i8_to_v8i32(<8 x i8> %a0)  {
98; CHECK-LABEL: @reduce_or_zext_v8i8_to_v8i32(
99; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[A0:%.*]])
100; CHECK-NEXT:    [[RED:%.*]] = zext i8 [[TMP1]] to i32
101; CHECK-NEXT:    ret i32 [[RED]]
102;
103  %tr = zext <8 x i8> %a0 to <8 x i32>
104  %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr)
105  ret i32 %red
106}
107
108define i32 @reduce_or_zext_v8i16_to_v8i32(<8 x i16> %a0)  {
109; CHECK-LABEL: @reduce_or_zext_v8i16_to_v8i32(
110; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[A0:%.*]])
111; CHECK-NEXT:    [[RED:%.*]] = zext i16 [[TMP1]] to i32
112; CHECK-NEXT:    ret i32 [[RED]]
113;
114  %tr = zext <8 x i16> %a0 to <8 x i32>
115  %red = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %tr)
116  ret i32 %red
117}
118
119; Negative case - narrowing the reduce (to i8) is illegal.
120; TODO: We could narrow to i16 instead.
121define i32 @reduce_add_trunc_v8i8_to_v8i32(<8 x i8> %a0)  {
122; CHECK-LABEL: @reduce_add_trunc_v8i8_to_v8i32(
123; CHECK-NEXT:    [[TR:%.*]] = zext <8 x i8> [[A0:%.*]] to <8 x i32>
124; CHECK-NEXT:    [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]])
125; CHECK-NEXT:    ret i32 [[RED]]
126;
127  %tr = zext <8 x i8> %a0 to <8 x i32>
128  %red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr)
129  ret i32 %red
130}
131
132
133declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
134declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
135declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
136declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
137declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
138declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
139declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
140
141