1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+zbb -riscv-v-vector-bits-min=128 -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s 3 4define i64 @reduce_add(i64 %x, <4 x i64> %v) { 5; CHECK-LABEL: reduce_add: 6; CHECK: # %bb.0: # %entry 7; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 8; CHECK-NEXT: vmv.s.x v10, a0 9; CHECK-NEXT: vredsum.vs v8, v8, v10 10; CHECK-NEXT: vmv.x.s a0, v8 11; CHECK-NEXT: ret 12entry: 13 %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) 14 %res = add i64 %rdx, %x 15 ret i64 %res 16} 17 18define i64 @reduce_add2(<4 x i64> %v) { 19; CHECK-LABEL: reduce_add2: 20; CHECK: # %bb.0: # %entry 21; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 22; CHECK-NEXT: vmv.v.i v10, 8 23; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 24; CHECK-NEXT: vredsum.vs v8, v8, v10 25; CHECK-NEXT: vmv.x.s a0, v8 26; CHECK-NEXT: ret 27entry: 28 %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) 29 %res = add i64 %rdx, 8 30 ret i64 %res 31} 32 33define i64 @reduce_and(i64 %x, <4 x i64> %v) { 34; CHECK-LABEL: reduce_and: 35; CHECK: # %bb.0: # %entry 36; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 37; CHECK-NEXT: vmv.s.x v10, a0 38; CHECK-NEXT: vredand.vs v8, v8, v10 39; CHECK-NEXT: vmv.x.s a0, v8 40; CHECK-NEXT: ret 41entry: 42 %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) 43 %res = and i64 %rdx, %x 44 ret i64 %res 45} 46 47define i64 @reduce_and2(<4 x i64> %v) { 48; CHECK-LABEL: reduce_and2: 49; CHECK: # %bb.0: # %entry 50; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 51; CHECK-NEXT: vmv.v.i v10, 8 52; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 53; CHECK-NEXT: vredand.vs v8, v8, v10 54; CHECK-NEXT: vmv.x.s a0, v8 55; CHECK-NEXT: ret 56entry: 57 %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) 58 %res = and i64 %rdx, 8 59 ret i64 %res 60} 61 62define i64 @reduce_or(i64 %x, <4 x i64> %v) { 63; CHECK-LABEL: reduce_or: 64; CHECK: # %bb.0: # %entry 65; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 66; CHECK-NEXT: vmv.s.x v10, a0 67; CHECK-NEXT: vredor.vs v8, v8, v10 68; CHECK-NEXT: vmv.x.s a0, v8 69; CHECK-NEXT: ret 70entry: 71 %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) 72 %res = or i64 %rdx, %x 73 ret i64 %res 74} 75 76define i64 @reduce_or2(<4 x i64> %v) { 77; CHECK-LABEL: reduce_or2: 78; CHECK: # %bb.0: # %entry 79; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 80; CHECK-NEXT: vmv.v.i v10, 8 81; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 82; CHECK-NEXT: vredor.vs v8, v8, v10 83; CHECK-NEXT: vmv.x.s a0, v8 84; CHECK-NEXT: ret 85entry: 86 %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) 87 %res = or i64 %rdx, 8 88 ret i64 %res 89} 90 91define i64 @reduce_xor(i64 %x, <4 x i64> %v) { 92; CHECK-LABEL: reduce_xor: 93; CHECK: # %bb.0: # %entry 94; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 95; CHECK-NEXT: vmv.s.x v10, a0 96; CHECK-NEXT: vredxor.vs v8, v8, v10 97; CHECK-NEXT: vmv.x.s a0, v8 98; CHECK-NEXT: ret 99entry: 100 %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) 101 %res = xor i64 %rdx, %x 102 ret i64 %res 103} 104 105define i64 @reduce_xor2(<4 x i64> %v) { 106; CHECK-LABEL: reduce_xor2: 107; CHECK: # %bb.0: # %entry 108; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 109; CHECK-NEXT: vmv.s.x v10, zero 110; CHECK-NEXT: vredxor.vs v8, v8, v10 111; CHECK-NEXT: vmv.x.s a0, v8 112; CHECK-NEXT: andi a0, a0, 8 113; CHECK-NEXT: ret 114entry: 115 %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) 116 %res = and i64 %rdx, 8 117 ret i64 %res 118} 119 120define i64 @reduce_umax(i64 %x, <4 x i64> %v) { 121; CHECK-LABEL: reduce_umax: 122; CHECK: # %bb.0: # %entry 123; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 124; CHECK-NEXT: vmv.s.x v10, a0 125; CHECK-NEXT: vredmaxu.vs v8, v8, v10 126; CHECK-NEXT: vmv.x.s a0, v8 127; CHECK-NEXT: ret 128entry: 129 %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) 130 %res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x) 131 ret i64 %res 132} 133 134define i64 @reduce_umax2(<4 x i64> %v) { 135; CHECK-LABEL: reduce_umax2: 136; CHECK: # %bb.0: # %entry 137; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 138; CHECK-NEXT: vmv.v.i v10, 8 139; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 140; CHECK-NEXT: vredmaxu.vs v8, v8, v10 141; CHECK-NEXT: vmv.x.s a0, v8 142; CHECK-NEXT: ret 143entry: 144 %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) 145 %res = call i64 @llvm.umax.i64(i64 %rdx, i64 8) 146 ret i64 %res 147} 148 149define i64 @reduce_umin(i64 %x, <4 x i64> %v) { 150; CHECK-LABEL: reduce_umin: 151; CHECK: # %bb.0: # %entry 152; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 153; CHECK-NEXT: vmv.s.x v10, a0 154; CHECK-NEXT: vredminu.vs v8, v8, v10 155; CHECK-NEXT: vmv.x.s a0, v8 156; CHECK-NEXT: ret 157entry: 158 %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) 159 %res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x) 160 ret i64 %res 161} 162 163define i64 @reduce_umin2(<4 x i64> %v) { 164; CHECK-LABEL: reduce_umin2: 165; CHECK: # %bb.0: # %entry 166; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 167; CHECK-NEXT: vmv.v.i v10, 8 168; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 169; CHECK-NEXT: vredminu.vs v8, v8, v10 170; CHECK-NEXT: vmv.x.s a0, v8 171; CHECK-NEXT: ret 172entry: 173 %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) 174 %res = call i64 @llvm.umin.i64(i64 %rdx, i64 8) 175 ret i64 %res 176} 177 178define i64 @reduce_smax(i64 %x, <4 x i64> %v) { 179; CHECK-LABEL: reduce_smax: 180; CHECK: # %bb.0: # %entry 181; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 182; CHECK-NEXT: vmv.s.x v10, a0 183; CHECK-NEXT: vredmax.vs v8, v8, v10 184; CHECK-NEXT: vmv.x.s a0, v8 185; CHECK-NEXT: ret 186entry: 187 %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) 188 %res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x) 189 ret i64 %res 190} 191 192define i64 @reduce_smax2(<4 x i64> %v) { 193; CHECK-LABEL: reduce_smax2: 194; CHECK: # %bb.0: # %entry 195; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 196; CHECK-NEXT: vmv.v.i v10, 8 197; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 198; CHECK-NEXT: vredmax.vs v8, v8, v10 199; CHECK-NEXT: vmv.x.s a0, v8 200; CHECK-NEXT: ret 201entry: 202 %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) 203 %res = call i64 @llvm.smax.i64(i64 %rdx, i64 8) 204 ret i64 %res 205} 206 207define i64 @reduce_smin(i64 %x, <4 x i64> %v) { 208; CHECK-LABEL: reduce_smin: 209; CHECK: # %bb.0: # %entry 210; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 211; CHECK-NEXT: vmv.s.x v10, a0 212; CHECK-NEXT: vredmin.vs v8, v8, v10 213; CHECK-NEXT: vmv.x.s a0, v8 214; CHECK-NEXT: ret 215entry: 216 %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) 217 %res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x) 218 ret i64 %res 219} 220 221define i64 @reduce_smin2(<4 x i64> %v) { 222; CHECK-LABEL: reduce_smin2: 223; CHECK: # %bb.0: # %entry 224; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 225; CHECK-NEXT: vmv.v.i v10, 8 226; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 227; CHECK-NEXT: vredmin.vs v8, v8, v10 228; CHECK-NEXT: vmv.x.s a0, v8 229; CHECK-NEXT: ret 230entry: 231 %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) 232 %res = call i64 @llvm.smin.i64(i64 %rdx, i64 8) 233 ret i64 %res 234} 235 236define float @reduce_fadd(float %x, <4 x float> %v) { 237; CHECK-LABEL: reduce_fadd: 238; CHECK: # %bb.0: # %entry 239; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 240; CHECK-NEXT: vfmv.s.f v9, fa0 241; CHECK-NEXT: vfredusum.vs v8, v8, v9 242; CHECK-NEXT: vfmv.f.s fa0, v8 243; CHECK-NEXT: ret 244entry: 245 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v) 246 ret float %rdx 247} 248 249define float @reduce_fadd2(float %x, <4 x float> %v) { 250; CHECK-LABEL: reduce_fadd2: 251; CHECK: # %bb.0: # %entry 252; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 253; CHECK-NEXT: vfmv.s.f v9, fa0 254; CHECK-NEXT: vfredusum.vs v8, v8, v9 255; CHECK-NEXT: vfmv.f.s fa0, v8 256; CHECK-NEXT: ret 257entry: 258 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v) 259 %res = fadd fast float %rdx, %x 260 ret float %res 261} 262 263define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) { 264; CHECK-LABEL: reduce_fadd3: 265; CHECK: # %bb.0: # %entry 266; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 267; CHECK-NEXT: vmv.s.x v9, zero 268; CHECK-NEXT: vfredusum.vs v8, v8, v9 269; CHECK-NEXT: vfmv.f.s fa5, v8 270; CHECK-NEXT: fadd.s fa0, fa5, fa0 271; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 272; CHECK-NEXT: vse32.v v8, (a0) 273; CHECK-NEXT: ret 274entry: 275 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v) 276 %res = fadd fast float %rdx, %x 277 store float %rdx, ptr %rdxptr 278 ret float %res 279} 280 281define float @reduce_fmax(float %x, <4 x float> %v) { 282; CHECK-LABEL: reduce_fmax: 283; CHECK: # %bb.0: # %entry 284; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 285; CHECK-NEXT: vfmv.s.f v9, fa0 286; CHECK-NEXT: vfredmax.vs v8, v8, v9 287; CHECK-NEXT: vfmv.f.s fa0, v8 288; CHECK-NEXT: ret 289entry: 290 %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) 291 %res = call float @llvm.maxnum.f32(float %x, float %rdx) 292 ret float %res 293} 294 295define float @reduce_fmin(float %x, <4 x float> %v) { 296; CHECK-LABEL: reduce_fmin: 297; CHECK: # %bb.0: # %entry 298; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 299; CHECK-NEXT: vfmv.s.f v9, fa0 300; CHECK-NEXT: vfredmin.vs v8, v8, v9 301; CHECK-NEXT: vfmv.f.s fa0, v8 302; CHECK-NEXT: ret 303entry: 304 %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) 305 %res = call float @llvm.minnum.f32(float %x, float %rdx) 306 ret float %res 307} 308 309; Function Attrs: nofree nosync nounwind readnone willreturn 310declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 311declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) 312declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) 313declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) 314declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) 315declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 316declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 317declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) 318declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 319declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 320declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 321declare i64 @llvm.umax.i64(i64, i64) 322declare i64 @llvm.umin.i64(i64, i64) 323declare i64 @llvm.smax.i64(i64, i64) 324declare i64 @llvm.smin.i64(i64, i64) 325declare float @llvm.maxnum.f32(float ,float) 326declare float @llvm.minnum.f32(float ,float) 327