1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+zbb -riscv-v-vector-bits-min=128 -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s 3 4define i64 @reduce_add(i64 %x, <4 x i64> %v) { 5; CHECK-LABEL: reduce_add: 6; CHECK: # %bb.0: # %entry 7; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 8; CHECK-NEXT: vmv.s.x v10, a0 9; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 10; CHECK-NEXT: vredsum.vs v8, v8, v10 11; CHECK-NEXT: vmv.x.s a0, v8 12; CHECK-NEXT: ret 13entry: 14 %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) 15 %res = add i64 %rdx, %x 16 ret i64 %res 17} 18 19define i64 @reduce_add2(<4 x i64> %v) { 20; CHECK-LABEL: reduce_add2: 21; CHECK: # %bb.0: # %entry 22; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 23; CHECK-NEXT: vmv.v.i v10, 8 24; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 25; CHECK-NEXT: vredsum.vs v8, v8, v10 26; CHECK-NEXT: vmv.x.s a0, v8 27; CHECK-NEXT: ret 28entry: 29 %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) 30 %res = add i64 %rdx, 8 31 ret i64 %res 32} 33 34define i64 @reduce_and(i64 %x, <4 x i64> %v) { 35; CHECK-LABEL: reduce_and: 36; CHECK: # %bb.0: # %entry 37; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 38; CHECK-NEXT: vmv.s.x v10, a0 39; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 40; CHECK-NEXT: vredand.vs v8, v8, v10 41; CHECK-NEXT: vmv.x.s a0, v8 42; CHECK-NEXT: ret 43entry: 44 %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) 45 %res = and i64 %rdx, %x 46 ret i64 %res 47} 48 49define i64 @reduce_and2(<4 x i64> %v) { 50; CHECK-LABEL: reduce_and2: 51; CHECK: # %bb.0: # %entry 52; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 53; CHECK-NEXT: vmv.v.i v10, 8 54; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 55; CHECK-NEXT: vredand.vs v8, v8, v10 56; CHECK-NEXT: vmv.x.s a0, v8 57; CHECK-NEXT: ret 58entry: 59 %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) 60 %res = and i64 %rdx, 8 61 ret i64 %res 62} 63 64define i64 @reduce_or(i64 %x, <4 x i64> %v) { 65; CHECK-LABEL: reduce_or: 66; CHECK: # %bb.0: # %entry 67; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 68; CHECK-NEXT: vmv.s.x v10, a0 69; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 70; CHECK-NEXT: vredor.vs v8, v8, v10 71; CHECK-NEXT: vmv.x.s a0, v8 72; CHECK-NEXT: ret 73entry: 74 %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) 75 %res = or i64 %rdx, %x 76 ret i64 %res 77} 78 79define i64 @reduce_or2(<4 x i64> %v) { 80; CHECK-LABEL: reduce_or2: 81; CHECK: # %bb.0: # %entry 82; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 83; CHECK-NEXT: vmv.v.i v10, 8 84; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 85; CHECK-NEXT: vredor.vs v8, v8, v10 86; CHECK-NEXT: vmv.x.s a0, v8 87; CHECK-NEXT: ret 88entry: 89 %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) 90 %res = or i64 %rdx, 8 91 ret i64 %res 92} 93 94define i64 @reduce_xor(i64 %x, <4 x i64> %v) { 95; CHECK-LABEL: reduce_xor: 96; CHECK: # %bb.0: # %entry 97; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 98; CHECK-NEXT: vmv.s.x v10, a0 99; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 100; CHECK-NEXT: vredxor.vs v8, v8, v10 101; CHECK-NEXT: vmv.x.s a0, v8 102; CHECK-NEXT: ret 103entry: 104 %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) 105 %res = xor i64 %rdx, %x 106 ret i64 %res 107} 108 109define i64 @reduce_xor2(<4 x i64> %v) { 110; CHECK-LABEL: reduce_xor2: 111; CHECK: # %bb.0: # %entry 112; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 113; CHECK-NEXT: vmv.s.x v10, zero 114; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 115; CHECK-NEXT: vredxor.vs v8, v8, v10 116; CHECK-NEXT: vmv.x.s a0, v8 117; CHECK-NEXT: andi a0, a0, 8 118; CHECK-NEXT: ret 119entry: 120 %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) 121 %res = and i64 %rdx, 8 122 ret i64 %res 123} 124 125define i64 @reduce_umax(i64 %x, <4 x i64> %v) { 126; CHECK-LABEL: reduce_umax: 127; CHECK: # %bb.0: # %entry 128; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 129; CHECK-NEXT: vmv.s.x v10, a0 130; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 131; CHECK-NEXT: vredmaxu.vs v8, v8, v10 132; CHECK-NEXT: vmv.x.s a0, v8 133; CHECK-NEXT: ret 134entry: 135 %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) 136 %res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x) 137 ret i64 %res 138} 139 140define i64 @reduce_umax2(<4 x i64> %v) { 141; CHECK-LABEL: reduce_umax2: 142; CHECK: # %bb.0: # %entry 143; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 144; CHECK-NEXT: vmv.v.i v10, 8 145; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 146; CHECK-NEXT: vredmaxu.vs v8, v8, v10 147; CHECK-NEXT: vmv.x.s a0, v8 148; CHECK-NEXT: ret 149entry: 150 %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) 151 %res = call i64 @llvm.umax.i64(i64 %rdx, i64 8) 152 ret i64 %res 153} 154 155define i64 @reduce_umin(i64 %x, <4 x i64> %v) { 156; CHECK-LABEL: reduce_umin: 157; CHECK: # %bb.0: # %entry 158; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 159; CHECK-NEXT: vmv.s.x v10, a0 160; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 161; CHECK-NEXT: vredminu.vs v8, v8, v10 162; CHECK-NEXT: vmv.x.s a0, v8 163; CHECK-NEXT: ret 164entry: 165 %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) 166 %res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x) 167 ret i64 %res 168} 169 170define i64 @reduce_umin2(<4 x i64> %v) { 171; CHECK-LABEL: reduce_umin2: 172; CHECK: # %bb.0: # %entry 173; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 174; CHECK-NEXT: vmv.v.i v10, 8 175; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 176; CHECK-NEXT: vredminu.vs v8, v8, v10 177; CHECK-NEXT: vmv.x.s a0, v8 178; CHECK-NEXT: ret 179entry: 180 %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) 181 %res = call i64 @llvm.umin.i64(i64 %rdx, i64 8) 182 ret i64 %res 183} 184 185define i64 @reduce_smax(i64 %x, <4 x i64> %v) { 186; CHECK-LABEL: reduce_smax: 187; CHECK: # %bb.0: # %entry 188; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 189; CHECK-NEXT: vmv.s.x v10, a0 190; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 191; CHECK-NEXT: vredmax.vs v8, v8, v10 192; CHECK-NEXT: vmv.x.s a0, v8 193; CHECK-NEXT: ret 194entry: 195 %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) 196 %res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x) 197 ret i64 %res 198} 199 200define i64 @reduce_smax2(<4 x i64> %v) { 201; CHECK-LABEL: reduce_smax2: 202; CHECK: # %bb.0: # %entry 203; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 204; CHECK-NEXT: vmv.v.i v10, 8 205; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 206; CHECK-NEXT: vredmax.vs v8, v8, v10 207; CHECK-NEXT: vmv.x.s a0, v8 208; CHECK-NEXT: ret 209entry: 210 %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) 211 %res = call i64 @llvm.smax.i64(i64 %rdx, i64 8) 212 ret i64 %res 213} 214 215define i64 @reduce_smin(i64 %x, <4 x i64> %v) { 216; CHECK-LABEL: reduce_smin: 217; CHECK: # %bb.0: # %entry 218; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 219; CHECK-NEXT: vmv.s.x v10, a0 220; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 221; CHECK-NEXT: vredmin.vs v8, v8, v10 222; CHECK-NEXT: vmv.x.s a0, v8 223; CHECK-NEXT: ret 224entry: 225 %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) 226 %res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x) 227 ret i64 %res 228} 229 230define i64 @reduce_smin2(<4 x i64> %v) { 231; CHECK-LABEL: reduce_smin2: 232; CHECK: # %bb.0: # %entry 233; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu 234; CHECK-NEXT: vmv.v.i v10, 8 235; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu 236; CHECK-NEXT: vredmin.vs v8, v8, v10 237; CHECK-NEXT: vmv.x.s a0, v8 238; CHECK-NEXT: ret 239entry: 240 %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) 241 %res = call i64 @llvm.smin.i64(i64 %rdx, i64 8) 242 ret i64 %res 243} 244 245define float @reduce_fadd(float %x, <4 x float> %v) { 246; CHECK-LABEL: reduce_fadd: 247; CHECK: # %bb.0: # %entry 248; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu 249; CHECK-NEXT: vfmv.s.f v9, fa0 250; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 251; CHECK-NEXT: vfredusum.vs v8, v8, v9 252; CHECK-NEXT: vfmv.f.s fa0, v8 253; CHECK-NEXT: ret 254entry: 255 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v) 256 ret float %rdx 257} 258 259define float @reduce_fadd2(float %x, <4 x float> %v) { 260; CHECK-LABEL: reduce_fadd2: 261; CHECK: # %bb.0: # %entry 262; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu 263; CHECK-NEXT: vfmv.s.f v9, fa0 264; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 265; CHECK-NEXT: vfredusum.vs v8, v8, v9 266; CHECK-NEXT: vfmv.f.s fa0, v8 267; CHECK-NEXT: ret 268entry: 269 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v) 270 %res = fadd fast float %rdx, %x 271 ret float %res 272} 273 274define float @reduce_fmax(float %x, <4 x float> %v) { 275; CHECK-LABEL: reduce_fmax: 276; CHECK: # %bb.0: # %entry 277; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu 278; CHECK-NEXT: vfmv.s.f v9, fa0 279; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 280; CHECK-NEXT: vfredmax.vs v8, v8, v9 281; CHECK-NEXT: vfmv.f.s fa0, v8 282; CHECK-NEXT: ret 283entry: 284 %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) 285 %res = call float @llvm.maxnum.f32(float %x, float %rdx) 286 ret float %res 287} 288 289define float @reduce_fmin(float %x, <4 x float> %v) { 290; CHECK-LABEL: reduce_fmin: 291; CHECK: # %bb.0: # %entry 292; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu 293; CHECK-NEXT: vfmv.s.f v9, fa0 294; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 295; CHECK-NEXT: vfredmin.vs v8, v8, v9 296; CHECK-NEXT: vfmv.f.s fa0, v8 297; CHECK-NEXT: ret 298entry: 299 %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) 300 %res = call float @llvm.minnum.f32(float %x, float %rdx) 301 ret float %res 302} 303 304; Function Attrs: nofree nosync nounwind readnone willreturn 305declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 306declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) 307declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) 308declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) 309declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) 310declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 311declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 312declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) 313declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 314declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 315declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 316declare i64 @llvm.umax.i64(i64, i64) 317declare i64 @llvm.umin.i64(i64, i64) 318declare i64 @llvm.smax.i64(i64, i64) 319declare i64 @llvm.smin.i64(i64, i64) 320declare float @llvm.maxnum.f32(float ,float) 321declare float @llvm.minnum.f32(float ,float) 322