xref: /llvm-project/llvm/test/CodeGen/RISCV/reduction-formation.ll (revision 14c4f28ec109ec84158d60a74d3d1b7bfa411c77)
1199cbec9SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2199cbec9SPhilip Reames; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
3199cbec9SPhilip Reames; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
4199cbec9SPhilip Reames
5199cbec9SPhilip Reames; Negative test to ensure we don't try to generate a vector reduce when
6199cbec9SPhilip Reames; vector instructions are not available.
7199cbec9SPhilip Reames
8199cbec9SPhilip Reamesdefine i32 @reduce_sum_4xi32(<4 x i32> %v) {
9199cbec9SPhilip Reames; RV32-LABEL: reduce_sum_4xi32:
10199cbec9SPhilip Reames; RV32:       # %bb.0:
11*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a1, 0(a0)
12199cbec9SPhilip Reames; RV32-NEXT:    lw a2, 4(a0)
13*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a3, 8(a0)
14*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a0, 12(a0)
15*14c4f28eSAlex Bradbury; RV32-NEXT:    add a1, a1, a2
16*14c4f28eSAlex Bradbury; RV32-NEXT:    add a0, a3, a0
17*14c4f28eSAlex Bradbury; RV32-NEXT:    add a0, a1, a0
18199cbec9SPhilip Reames; RV32-NEXT:    ret
19199cbec9SPhilip Reames;
20199cbec9SPhilip Reames; RV64-LABEL: reduce_sum_4xi32:
21199cbec9SPhilip Reames; RV64:       # %bb.0:
22*14c4f28eSAlex Bradbury; RV64-NEXT:    lw a1, 0(a0)
23199cbec9SPhilip Reames; RV64-NEXT:    lw a2, 8(a0)
24*14c4f28eSAlex Bradbury; RV64-NEXT:    lw a3, 16(a0)
25*14c4f28eSAlex Bradbury; RV64-NEXT:    lw a0, 24(a0)
26*14c4f28eSAlex Bradbury; RV64-NEXT:    add a1, a1, a2
27*14c4f28eSAlex Bradbury; RV64-NEXT:    add a0, a3, a0
28*14c4f28eSAlex Bradbury; RV64-NEXT:    addw a0, a1, a0
29199cbec9SPhilip Reames; RV64-NEXT:    ret
30199cbec9SPhilip Reames  %e0 = extractelement <4 x i32> %v, i32 0
31199cbec9SPhilip Reames  %e1 = extractelement <4 x i32> %v, i32 1
32199cbec9SPhilip Reames  %e2 = extractelement <4 x i32> %v, i32 2
33199cbec9SPhilip Reames  %e3 = extractelement <4 x i32> %v, i32 3
34199cbec9SPhilip Reames  %add0 = add i32 %e0, %e1
35199cbec9SPhilip Reames  %add1 = add i32 %add0, %e2
36199cbec9SPhilip Reames  %add2 = add i32 %add1, %e3
37199cbec9SPhilip Reames  ret i32 %add2
38199cbec9SPhilip Reames}
39199cbec9SPhilip Reames
40199cbec9SPhilip Reamesdefine i32 @reduce_xor_4xi32(<4 x i32> %v) {
41199cbec9SPhilip Reames; RV32-LABEL: reduce_xor_4xi32:
42199cbec9SPhilip Reames; RV32:       # %bb.0:
43*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a1, 0(a0)
44199cbec9SPhilip Reames; RV32-NEXT:    lw a2, 4(a0)
45*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a3, 8(a0)
46*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a0, 12(a0)
47*14c4f28eSAlex Bradbury; RV32-NEXT:    xor a1, a1, a2
48*14c4f28eSAlex Bradbury; RV32-NEXT:    xor a0, a3, a0
49*14c4f28eSAlex Bradbury; RV32-NEXT:    xor a0, a1, a0
50199cbec9SPhilip Reames; RV32-NEXT:    ret
51199cbec9SPhilip Reames;
52199cbec9SPhilip Reames; RV64-LABEL: reduce_xor_4xi32:
53199cbec9SPhilip Reames; RV64:       # %bb.0:
54*14c4f28eSAlex Bradbury; RV64-NEXT:    ld a1, 0(a0)
55199cbec9SPhilip Reames; RV64-NEXT:    ld a2, 8(a0)
56*14c4f28eSAlex Bradbury; RV64-NEXT:    ld a3, 16(a0)
57*14c4f28eSAlex Bradbury; RV64-NEXT:    ld a0, 24(a0)
58*14c4f28eSAlex Bradbury; RV64-NEXT:    xor a1, a1, a2
59*14c4f28eSAlex Bradbury; RV64-NEXT:    xor a0, a3, a0
60*14c4f28eSAlex Bradbury; RV64-NEXT:    xor a0, a1, a0
61199cbec9SPhilip Reames; RV64-NEXT:    ret
62199cbec9SPhilip Reames  %e0 = extractelement <4 x i32> %v, i32 0
63199cbec9SPhilip Reames  %e1 = extractelement <4 x i32> %v, i32 1
64199cbec9SPhilip Reames  %e2 = extractelement <4 x i32> %v, i32 2
65199cbec9SPhilip Reames  %e3 = extractelement <4 x i32> %v, i32 3
66199cbec9SPhilip Reames  %xor0 = xor i32 %e0, %e1
67199cbec9SPhilip Reames  %xor1 = xor i32 %xor0, %e2
68199cbec9SPhilip Reames  %xor2 = xor i32 %xor1, %e3
69199cbec9SPhilip Reames  ret i32 %xor2
70199cbec9SPhilip Reames}
71199cbec9SPhilip Reames
72199cbec9SPhilip Reamesdefine i32 @reduce_or_4xi32(<4 x i32> %v) {
73199cbec9SPhilip Reames; RV32-LABEL: reduce_or_4xi32:
74199cbec9SPhilip Reames; RV32:       # %bb.0:
75*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a1, 0(a0)
76199cbec9SPhilip Reames; RV32-NEXT:    lw a2, 4(a0)
77*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a3, 8(a0)
78*14c4f28eSAlex Bradbury; RV32-NEXT:    lw a0, 12(a0)
79*14c4f28eSAlex Bradbury; RV32-NEXT:    or a1, a1, a2
80*14c4f28eSAlex Bradbury; RV32-NEXT:    or a0, a3, a0
81*14c4f28eSAlex Bradbury; RV32-NEXT:    or a0, a1, a0
82199cbec9SPhilip Reames; RV32-NEXT:    ret
83199cbec9SPhilip Reames;
84199cbec9SPhilip Reames; RV64-LABEL: reduce_or_4xi32:
85199cbec9SPhilip Reames; RV64:       # %bb.0:
86*14c4f28eSAlex Bradbury; RV64-NEXT:    ld a1, 0(a0)
87199cbec9SPhilip Reames; RV64-NEXT:    ld a2, 8(a0)
88*14c4f28eSAlex Bradbury; RV64-NEXT:    ld a3, 16(a0)
89*14c4f28eSAlex Bradbury; RV64-NEXT:    ld a0, 24(a0)
90*14c4f28eSAlex Bradbury; RV64-NEXT:    or a1, a1, a2
91*14c4f28eSAlex Bradbury; RV64-NEXT:    or a0, a3, a0
92*14c4f28eSAlex Bradbury; RV64-NEXT:    or a0, a1, a0
93199cbec9SPhilip Reames; RV64-NEXT:    ret
94199cbec9SPhilip Reames  %e0 = extractelement <4 x i32> %v, i32 0
95199cbec9SPhilip Reames  %e1 = extractelement <4 x i32> %v, i32 1
96199cbec9SPhilip Reames  %e2 = extractelement <4 x i32> %v, i32 2
97199cbec9SPhilip Reames  %e3 = extractelement <4 x i32> %v, i32 3
98199cbec9SPhilip Reames  %or0 = or i32 %e0, %e1
99199cbec9SPhilip Reames  %or1 = or i32 %or0, %e2
100199cbec9SPhilip Reames  %or2 = or i32 %or1, %e3
101199cbec9SPhilip Reames  ret i32 %or2
102199cbec9SPhilip Reames}
103