1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+d,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5; Test that limiting ELEN through zve32 scalarizes elements larger than 32 bits 6; and disables some fractional LMULs. 7 8; This should use LMUL=1. 9define void @add_v4i32(ptr %x, ptr %y) { 10; CHECK-LABEL: add_v4i32: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 13; CHECK-NEXT: vle32.v v8, (a0) 14; CHECK-NEXT: vle32.v v9, (a1) 15; CHECK-NEXT: vadd.vv v8, v8, v9 16; CHECK-NEXT: vse32.v v8, (a0) 17; CHECK-NEXT: ret 18 %a = load <4 x i32>, ptr %x 19 %b = load <4 x i32>, ptr %y 20 %c = add <4 x i32> %a, %b 21 store <4 x i32> %c, ptr %x 22 ret void 23} 24 25; i64 vectors should be scalarized 26define void @add_v2i64(ptr %x, ptr %y) { 27; RV32-LABEL: add_v2i64: 28; RV32: # %bb.0: 29; RV32-NEXT: lw a2, 0(a1) 30; RV32-NEXT: lw a3, 4(a1) 31; RV32-NEXT: lw a4, 0(a0) 32; RV32-NEXT: lw a5, 4(a0) 33; RV32-NEXT: lw a6, 8(a0) 34; RV32-NEXT: lw a7, 12(a0) 35; RV32-NEXT: lw t0, 12(a1) 36; RV32-NEXT: lw a1, 8(a1) 37; RV32-NEXT: add a3, a5, a3 38; RV32-NEXT: add a2, a4, a2 39; RV32-NEXT: add a7, a7, t0 40; RV32-NEXT: add a1, a6, a1 41; RV32-NEXT: sltu a4, a2, a4 42; RV32-NEXT: sltu a5, a1, a6 43; RV32-NEXT: add a3, a3, a4 44; RV32-NEXT: add a5, a7, a5 45; RV32-NEXT: sw a2, 0(a0) 46; RV32-NEXT: sw a3, 4(a0) 47; RV32-NEXT: sw a1, 8(a0) 48; RV32-NEXT: sw a5, 12(a0) 49; RV32-NEXT: ret 50; 51; RV64-LABEL: add_v2i64: 52; RV64: # %bb.0: 53; RV64-NEXT: ld a2, 0(a0) 54; RV64-NEXT: ld a3, 8(a0) 55; RV64-NEXT: ld a4, 0(a1) 56; RV64-NEXT: ld a1, 8(a1) 57; RV64-NEXT: add a2, a2, a4 58; RV64-NEXT: add a1, a3, a1 59; RV64-NEXT: sd a2, 0(a0) 60; RV64-NEXT: sd a1, 8(a0) 61; RV64-NEXT: ret 62 %a = load <2 x i64>, ptr %x 63 %b = load <2 x i64>, ptr %y 64 %c = add <2 x i64> %a, %b 65 store <2 x i64> %c, ptr %x 66 ret void 67} 68 69; This should use LMUL=1 becuase there are no fractional i32 LMULs with ELEN=32 70define void @add_v2i32(ptr %x, ptr %y) { 71; CHECK-LABEL: add_v2i32: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma 74; CHECK-NEXT: vle32.v v8, (a0) 75; CHECK-NEXT: vle32.v v9, (a1) 76; CHECK-NEXT: vadd.vv v8, v8, v9 77; CHECK-NEXT: vse32.v v8, (a0) 78; CHECK-NEXT: ret 79 %a = load <2 x i32>, ptr %x 80 %b = load <2 x i32>, ptr %y 81 %c = add <2 x i32> %a, %b 82 store <2 x i32> %c, ptr %x 83 ret void 84} 85 86; i64 vectors should be scalarized 87define void @add_v1i64(ptr %x, ptr %y) { 88; RV32-LABEL: add_v1i64: 89; RV32: # %bb.0: 90; RV32-NEXT: lw a2, 0(a0) 91; RV32-NEXT: lw a3, 4(a0) 92; RV32-NEXT: lw a4, 4(a1) 93; RV32-NEXT: lw a1, 0(a1) 94; RV32-NEXT: add a3, a3, a4 95; RV32-NEXT: add a1, a2, a1 96; RV32-NEXT: sltu a2, a1, a2 97; RV32-NEXT: add a2, a3, a2 98; RV32-NEXT: sw a1, 0(a0) 99; RV32-NEXT: sw a2, 4(a0) 100; RV32-NEXT: ret 101; 102; RV64-LABEL: add_v1i64: 103; RV64: # %bb.0: 104; RV64-NEXT: ld a2, 0(a0) 105; RV64-NEXT: ld a1, 0(a1) 106; RV64-NEXT: add a1, a2, a1 107; RV64-NEXT: sd a1, 0(a0) 108; RV64-NEXT: ret 109 %a = load <1 x i64>, ptr %x 110 %b = load <1 x i64>, ptr %y 111 %c = add <1 x i64> %a, %b 112 store <1 x i64> %c, ptr %x 113 ret void 114} 115 116; This should use LMUL=1. 117define void @fadd_v4f32(ptr %x, ptr %y) { 118; CHECK-LABEL: fadd_v4f32: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 121; CHECK-NEXT: vle32.v v8, (a0) 122; CHECK-NEXT: vle32.v v9, (a1) 123; CHECK-NEXT: vfadd.vv v8, v8, v9 124; CHECK-NEXT: vse32.v v8, (a0) 125; CHECK-NEXT: ret 126 %a = load <4 x float>, ptr %x 127 %b = load <4 x float>, ptr %y 128 %c = fadd <4 x float> %a, %b 129 store <4 x float> %c, ptr %x 130 ret void 131} 132 133; double vectors should be scalarized 134define void @fadd_v2f64(ptr %x, ptr %y) { 135; CHECK-LABEL: fadd_v2f64: 136; CHECK: # %bb.0: 137; CHECK-NEXT: fld fa5, 0(a0) 138; CHECK-NEXT: fld fa4, 8(a0) 139; CHECK-NEXT: fld fa3, 0(a1) 140; CHECK-NEXT: fld fa2, 8(a1) 141; CHECK-NEXT: fadd.d fa5, fa5, fa3 142; CHECK-NEXT: fadd.d fa4, fa4, fa2 143; CHECK-NEXT: fsd fa5, 0(a0) 144; CHECK-NEXT: fsd fa4, 8(a0) 145; CHECK-NEXT: ret 146 %a = load <2 x double>, ptr %x 147 %b = load <2 x double>, ptr %y 148 %c = fadd <2 x double> %a, %b 149 store <2 x double> %c, ptr %x 150 ret void 151} 152 153; This should use LMUL=1 becuase there are no fractional float LMULs with ELEN=32 154define void @fadd_v2f32(ptr %x, ptr %y) { 155; CHECK-LABEL: fadd_v2f32: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma 158; CHECK-NEXT: vle32.v v8, (a0) 159; CHECK-NEXT: vle32.v v9, (a1) 160; CHECK-NEXT: vfadd.vv v8, v8, v9 161; CHECK-NEXT: vse32.v v8, (a0) 162; CHECK-NEXT: ret 163 %a = load <2 x float>, ptr %x 164 %b = load <2 x float>, ptr %y 165 %c = fadd <2 x float> %a, %b 166 store <2 x float> %c, ptr %x 167 ret void 168} 169 170; double vectors should be scalarized 171define void @fadd_v1f64(ptr %x, ptr %y) { 172; CHECK-LABEL: fadd_v1f64: 173; CHECK: # %bb.0: 174; CHECK-NEXT: fld fa5, 0(a0) 175; CHECK-NEXT: fld fa4, 0(a1) 176; CHECK-NEXT: fadd.d fa5, fa5, fa4 177; CHECK-NEXT: fsd fa5, 0(a0) 178; CHECK-NEXT: ret 179 %a = load <1 x double>, ptr %x 180 %b = load <1 x double>, ptr %y 181 %c = fadd <1 x double> %a, %b 182 store <1 x double> %c, ptr %x 183 ret void 184} 185