1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 3; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 4 5; Negative test to ensure we don't try to generate a vector reduce when 6; vector instructions are not available. 7 8define i32 @reduce_sum_4xi32(<4 x i32> %v) { 9; RV32-LABEL: reduce_sum_4xi32: 10; RV32: # %bb.0: 11; RV32-NEXT: lw a1, 0(a0) 12; RV32-NEXT: lw a2, 4(a0) 13; RV32-NEXT: lw a3, 8(a0) 14; RV32-NEXT: lw a0, 12(a0) 15; RV32-NEXT: add a1, a1, a2 16; RV32-NEXT: add a0, a3, a0 17; RV32-NEXT: add a0, a1, a0 18; RV32-NEXT: ret 19; 20; RV64-LABEL: reduce_sum_4xi32: 21; RV64: # %bb.0: 22; RV64-NEXT: lw a1, 0(a0) 23; RV64-NEXT: lw a2, 8(a0) 24; RV64-NEXT: lw a3, 16(a0) 25; RV64-NEXT: lw a0, 24(a0) 26; RV64-NEXT: add a1, a1, a2 27; RV64-NEXT: add a0, a3, a0 28; RV64-NEXT: addw a0, a1, a0 29; RV64-NEXT: ret 30 %e0 = extractelement <4 x i32> %v, i32 0 31 %e1 = extractelement <4 x i32> %v, i32 1 32 %e2 = extractelement <4 x i32> %v, i32 2 33 %e3 = extractelement <4 x i32> %v, i32 3 34 %add0 = add i32 %e0, %e1 35 %add1 = add i32 %add0, %e2 36 %add2 = add i32 %add1, %e3 37 ret i32 %add2 38} 39 40define i32 @reduce_xor_4xi32(<4 x i32> %v) { 41; RV32-LABEL: reduce_xor_4xi32: 42; RV32: # %bb.0: 43; RV32-NEXT: lw a1, 0(a0) 44; RV32-NEXT: lw a2, 4(a0) 45; RV32-NEXT: lw a3, 8(a0) 46; RV32-NEXT: lw a0, 12(a0) 47; RV32-NEXT: xor a1, a1, a2 48; RV32-NEXT: xor a0, a3, a0 49; RV32-NEXT: xor a0, a1, a0 50; RV32-NEXT: ret 51; 52; RV64-LABEL: reduce_xor_4xi32: 53; RV64: # %bb.0: 54; RV64-NEXT: ld a1, 0(a0) 55; RV64-NEXT: ld a2, 8(a0) 56; RV64-NEXT: ld a3, 16(a0) 57; RV64-NEXT: ld a0, 24(a0) 58; RV64-NEXT: xor a1, a1, a2 59; RV64-NEXT: xor a0, a3, a0 60; RV64-NEXT: xor a0, a1, a0 61; RV64-NEXT: ret 62 %e0 = extractelement <4 x i32> %v, i32 0 63 %e1 = extractelement <4 x i32> %v, i32 1 64 %e2 = extractelement <4 x i32> %v, i32 2 65 %e3 = extractelement <4 x i32> %v, i32 3 66 %xor0 = xor i32 %e0, %e1 67 %xor1 = xor i32 %xor0, %e2 68 %xor2 = xor i32 %xor1, %e3 69 ret i32 %xor2 70} 71 72define i32 @reduce_or_4xi32(<4 x i32> %v) { 73; RV32-LABEL: reduce_or_4xi32: 74; RV32: # %bb.0: 75; RV32-NEXT: lw a1, 0(a0) 76; RV32-NEXT: lw a2, 4(a0) 77; RV32-NEXT: lw a3, 8(a0) 78; RV32-NEXT: lw a0, 12(a0) 79; RV32-NEXT: or a1, a1, a2 80; RV32-NEXT: or a0, a3, a0 81; RV32-NEXT: or a0, a1, a0 82; RV32-NEXT: ret 83; 84; RV64-LABEL: reduce_or_4xi32: 85; RV64: # %bb.0: 86; RV64-NEXT: ld a1, 0(a0) 87; RV64-NEXT: ld a2, 8(a0) 88; RV64-NEXT: ld a3, 16(a0) 89; RV64-NEXT: ld a0, 24(a0) 90; RV64-NEXT: or a1, a1, a2 91; RV64-NEXT: or a0, a3, a0 92; RV64-NEXT: or a0, a1, a0 93; RV64-NEXT: ret 94 %e0 = extractelement <4 x i32> %v, i32 0 95 %e1 = extractelement <4 x i32> %v, i32 1 96 %e2 = extractelement <4 x i32> %v, i32 2 97 %e3 = extractelement <4 x i32> %v, i32 3 98 %or0 = or i32 %e0, %e1 99 %or1 = or i32 %or0, %e2 100 %or2 = or i32 %or1, %e3 101 ret i32 %or2 102} 103