1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32) 8 9define signext i8 @vpreduce_add_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vpreduce_add_v2i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 13; CHECK-NEXT: vmv.s.x v9, a0 14; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 15; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t 16; CHECK-NEXT: vmv.x.s a0, v9 17; CHECK-NEXT: ret 18 %r = call i8 @llvm.vp.reduce.add.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 19 ret i8 %r 20} 21 22declare i8 @llvm.vp.reduce.umax.v2i8(i8, <2 x i8>, <2 x i1>, i32) 23 24define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 25; CHECK-LABEL: vpreduce_umax_v2i8: 26; CHECK: # %bb.0: 27; CHECK-NEXT: andi a0, a0, 255 28; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 29; CHECK-NEXT: vmv.s.x v9, a0 30; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 31; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t 32; CHECK-NEXT: vmv.x.s a0, v9 33; CHECK-NEXT: ret 34 %r = call i8 @llvm.vp.reduce.umax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 35 ret i8 %r 36} 37 38declare i8 @llvm.vp.reduce.smax.v2i8(i8, <2 x i8>, <2 x i1>, i32) 39 40define signext i8 @vpreduce_smax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 41; CHECK-LABEL: vpreduce_smax_v2i8: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 44; CHECK-NEXT: vmv.s.x v9, a0 45; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 46; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t 47; CHECK-NEXT: vmv.x.s a0, v9 48; CHECK-NEXT: ret 49 %r = call i8 @llvm.vp.reduce.smax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 50 ret i8 %r 51} 52 53declare i8 @llvm.vp.reduce.umin.v2i8(i8, <2 x i8>, <2 x i1>, i32) 54 55define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 56; CHECK-LABEL: vpreduce_umin_v2i8: 57; CHECK: # %bb.0: 58; CHECK-NEXT: andi a0, a0, 255 59; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 60; CHECK-NEXT: vmv.s.x v9, a0 61; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 62; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t 63; CHECK-NEXT: vmv.x.s a0, v9 64; CHECK-NEXT: ret 65 %r = call i8 @llvm.vp.reduce.umin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 66 ret i8 %r 67} 68 69declare i8 @llvm.vp.reduce.smin.v2i8(i8, <2 x i8>, <2 x i1>, i32) 70 71define signext i8 @vpreduce_smin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 72; CHECK-LABEL: vpreduce_smin_v2i8: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 75; CHECK-NEXT: vmv.s.x v9, a0 76; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 77; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t 78; CHECK-NEXT: vmv.x.s a0, v9 79; CHECK-NEXT: ret 80 %r = call i8 @llvm.vp.reduce.smin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 81 ret i8 %r 82} 83 84declare i8 @llvm.vp.reduce.and.v2i8(i8, <2 x i8>, <2 x i1>, i32) 85 86define signext i8 @vpreduce_and_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 87; CHECK-LABEL: vpreduce_and_v2i8: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 90; CHECK-NEXT: vmv.s.x v9, a0 91; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 92; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t 93; CHECK-NEXT: vmv.x.s a0, v9 94; CHECK-NEXT: ret 95 %r = call i8 @llvm.vp.reduce.and.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 96 ret i8 %r 97} 98 99declare i8 @llvm.vp.reduce.or.v2i8(i8, <2 x i8>, <2 x i1>, i32) 100 101define signext i8 @vpreduce_or_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 102; CHECK-LABEL: vpreduce_or_v2i8: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 105; CHECK-NEXT: vmv.s.x v9, a0 106; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 107; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t 108; CHECK-NEXT: vmv.x.s a0, v9 109; CHECK-NEXT: ret 110 %r = call i8 @llvm.vp.reduce.or.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 111 ret i8 %r 112} 113 114declare i8 @llvm.vp.reduce.xor.v2i8(i8, <2 x i8>, <2 x i1>, i32) 115 116define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 117; CHECK-LABEL: vpreduce_xor_v2i8: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 120; CHECK-NEXT: vmv.s.x v9, a0 121; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 122; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t 123; CHECK-NEXT: vmv.x.s a0, v9 124; CHECK-NEXT: ret 125 %r = call i8 @llvm.vp.reduce.xor.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 126 ret i8 %r 127} 128 129declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32) 130 131define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) { 132; CHECK-LABEL: vpreduce_umin_v3i8: 133; CHECK: # %bb.0: 134; CHECK-NEXT: andi a0, a0, 255 135; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 136; CHECK-NEXT: vmv.s.x v9, a0 137; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 138; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t 139; CHECK-NEXT: vmv.x.s a0, v9 140; CHECK-NEXT: ret 141 %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl) 142 ret i8 %r 143} 144 145declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32) 146 147define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 148; CHECK-LABEL: vpreduce_add_v4i8: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 151; CHECK-NEXT: vmv.s.x v9, a0 152; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 153; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t 154; CHECK-NEXT: vmv.x.s a0, v9 155; CHECK-NEXT: ret 156 %r = call i8 @llvm.vp.reduce.add.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 157 ret i8 %r 158} 159 160declare i8 @llvm.vp.reduce.umax.v4i8(i8, <4 x i8>, <4 x i1>, i32) 161 162define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 163; CHECK-LABEL: vpreduce_umax_v4i8: 164; CHECK: # %bb.0: 165; CHECK-NEXT: andi a0, a0, 255 166; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 167; CHECK-NEXT: vmv.s.x v9, a0 168; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 169; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t 170; CHECK-NEXT: vmv.x.s a0, v9 171; CHECK-NEXT: ret 172 %r = call i8 @llvm.vp.reduce.umax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 173 ret i8 %r 174} 175 176declare i8 @llvm.vp.reduce.smax.v4i8(i8, <4 x i8>, <4 x i1>, i32) 177 178define signext i8 @vpreduce_smax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 179; CHECK-LABEL: vpreduce_smax_v4i8: 180; CHECK: # %bb.0: 181; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 182; CHECK-NEXT: vmv.s.x v9, a0 183; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 184; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t 185; CHECK-NEXT: vmv.x.s a0, v9 186; CHECK-NEXT: ret 187 %r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 188 ret i8 %r 189} 190 191declare i8 @llvm.vp.reduce.umin.v4i8(i8, <4 x i8>, <4 x i1>, i32) 192 193define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 194; CHECK-LABEL: vpreduce_umin_v4i8: 195; CHECK: # %bb.0: 196; CHECK-NEXT: andi a0, a0, 255 197; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 198; CHECK-NEXT: vmv.s.x v9, a0 199; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 200; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t 201; CHECK-NEXT: vmv.x.s a0, v9 202; CHECK-NEXT: ret 203 %r = call i8 @llvm.vp.reduce.umin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 204 ret i8 %r 205} 206 207declare i8 @llvm.vp.reduce.smin.v4i8(i8, <4 x i8>, <4 x i1>, i32) 208 209define signext i8 @vpreduce_smin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 210; CHECK-LABEL: vpreduce_smin_v4i8: 211; CHECK: # %bb.0: 212; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 213; CHECK-NEXT: vmv.s.x v9, a0 214; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 215; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t 216; CHECK-NEXT: vmv.x.s a0, v9 217; CHECK-NEXT: ret 218 %r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 219 ret i8 %r 220} 221 222declare i8 @llvm.vp.reduce.and.v4i8(i8, <4 x i8>, <4 x i1>, i32) 223 224define signext i8 @vpreduce_and_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 225; CHECK-LABEL: vpreduce_and_v4i8: 226; CHECK: # %bb.0: 227; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 228; CHECK-NEXT: vmv.s.x v9, a0 229; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 230; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t 231; CHECK-NEXT: vmv.x.s a0, v9 232; CHECK-NEXT: ret 233 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 234 ret i8 %r 235} 236 237declare i8 @llvm.vp.reduce.or.v4i8(i8, <4 x i8>, <4 x i1>, i32) 238 239define signext i8 @vpreduce_or_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 240; CHECK-LABEL: vpreduce_or_v4i8: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 243; CHECK-NEXT: vmv.s.x v9, a0 244; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 245; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t 246; CHECK-NEXT: vmv.x.s a0, v9 247; CHECK-NEXT: ret 248 %r = call i8 @llvm.vp.reduce.or.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 249 ret i8 %r 250} 251 252declare i8 @llvm.vp.reduce.xor.v4i8(i8, <4 x i8>, <4 x i1>, i32) 253 254define signext i8 @vpreduce_xor_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 255; CHECK-LABEL: vpreduce_xor_v4i8: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 258; CHECK-NEXT: vmv.s.x v9, a0 259; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 260; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t 261; CHECK-NEXT: vmv.x.s a0, v9 262; CHECK-NEXT: ret 263 %r = call i8 @llvm.vp.reduce.xor.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 264 ret i8 %r 265} 266 267declare i16 @llvm.vp.reduce.add.v2i16(i16, <2 x i16>, <2 x i1>, i32) 268 269define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 270; CHECK-LABEL: vpreduce_add_v2i16: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 273; CHECK-NEXT: vmv.s.x v9, a0 274; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 275; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t 276; CHECK-NEXT: vmv.x.s a0, v9 277; CHECK-NEXT: ret 278 %r = call i16 @llvm.vp.reduce.add.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 279 ret i16 %r 280} 281 282declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32) 283 284define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 285; RV32-LABEL: vpreduce_umax_v2i16: 286; RV32: # %bb.0: 287; RV32-NEXT: slli a0, a0, 16 288; RV32-NEXT: srli a0, a0, 16 289; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 290; RV32-NEXT: vmv.s.x v9, a0 291; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 292; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t 293; RV32-NEXT: vmv.x.s a0, v9 294; RV32-NEXT: ret 295; 296; RV64-LABEL: vpreduce_umax_v2i16: 297; RV64: # %bb.0: 298; RV64-NEXT: slli a0, a0, 48 299; RV64-NEXT: srli a0, a0, 48 300; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 301; RV64-NEXT: vmv.s.x v9, a0 302; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 303; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t 304; RV64-NEXT: vmv.x.s a0, v9 305; RV64-NEXT: ret 306 %r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 307 ret i16 %r 308} 309 310declare i16 @llvm.vp.reduce.smax.v2i16(i16, <2 x i16>, <2 x i1>, i32) 311 312define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 313; CHECK-LABEL: vpreduce_smax_v2i16: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 316; CHECK-NEXT: vmv.s.x v9, a0 317; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 318; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t 319; CHECK-NEXT: vmv.x.s a0, v9 320; CHECK-NEXT: ret 321 %r = call i16 @llvm.vp.reduce.smax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 322 ret i16 %r 323} 324 325declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32) 326 327define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 328; RV32-LABEL: vpreduce_umin_v2i16: 329; RV32: # %bb.0: 330; RV32-NEXT: slli a0, a0, 16 331; RV32-NEXT: srli a0, a0, 16 332; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 333; RV32-NEXT: vmv.s.x v9, a0 334; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 335; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t 336; RV32-NEXT: vmv.x.s a0, v9 337; RV32-NEXT: ret 338; 339; RV64-LABEL: vpreduce_umin_v2i16: 340; RV64: # %bb.0: 341; RV64-NEXT: slli a0, a0, 48 342; RV64-NEXT: srli a0, a0, 48 343; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 344; RV64-NEXT: vmv.s.x v9, a0 345; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 346; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t 347; RV64-NEXT: vmv.x.s a0, v9 348; RV64-NEXT: ret 349 %r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 350 ret i16 %r 351} 352 353declare i16 @llvm.vp.reduce.smin.v2i16(i16, <2 x i16>, <2 x i1>, i32) 354 355define signext i16 @vpreduce_smin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 356; CHECK-LABEL: vpreduce_smin_v2i16: 357; CHECK: # %bb.0: 358; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 359; CHECK-NEXT: vmv.s.x v9, a0 360; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 361; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t 362; CHECK-NEXT: vmv.x.s a0, v9 363; CHECK-NEXT: ret 364 %r = call i16 @llvm.vp.reduce.smin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 365 ret i16 %r 366} 367 368declare i16 @llvm.vp.reduce.and.v2i16(i16, <2 x i16>, <2 x i1>, i32) 369 370define signext i16 @vpreduce_and_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 371; CHECK-LABEL: vpreduce_and_v2i16: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 374; CHECK-NEXT: vmv.s.x v9, a0 375; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 376; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t 377; CHECK-NEXT: vmv.x.s a0, v9 378; CHECK-NEXT: ret 379 %r = call i16 @llvm.vp.reduce.and.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 380 ret i16 %r 381} 382 383declare i16 @llvm.vp.reduce.or.v2i16(i16, <2 x i16>, <2 x i1>, i32) 384 385define signext i16 @vpreduce_or_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 386; CHECK-LABEL: vpreduce_or_v2i16: 387; CHECK: # %bb.0: 388; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 389; CHECK-NEXT: vmv.s.x v9, a0 390; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 391; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t 392; CHECK-NEXT: vmv.x.s a0, v9 393; CHECK-NEXT: ret 394 %r = call i16 @llvm.vp.reduce.or.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 395 ret i16 %r 396} 397 398declare i16 @llvm.vp.reduce.xor.v2i16(i16, <2 x i16>, <2 x i1>, i32) 399 400define signext i16 @vpreduce_xor_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { 401; CHECK-LABEL: vpreduce_xor_v2i16: 402; CHECK: # %bb.0: 403; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 404; CHECK-NEXT: vmv.s.x v9, a0 405; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 406; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t 407; CHECK-NEXT: vmv.x.s a0, v9 408; CHECK-NEXT: ret 409 %r = call i16 @llvm.vp.reduce.xor.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) 410 ret i16 %r 411} 412 413declare i16 @llvm.vp.reduce.add.v4i16(i16, <4 x i16>, <4 x i1>, i32) 414 415define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 416; CHECK-LABEL: vpreduce_add_v4i16: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 419; CHECK-NEXT: vmv.s.x v9, a0 420; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 421; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t 422; CHECK-NEXT: vmv.x.s a0, v9 423; CHECK-NEXT: ret 424 %r = call i16 @llvm.vp.reduce.add.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 425 ret i16 %r 426} 427 428declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32) 429 430define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 431; RV32-LABEL: vpreduce_umax_v4i16: 432; RV32: # %bb.0: 433; RV32-NEXT: slli a0, a0, 16 434; RV32-NEXT: srli a0, a0, 16 435; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 436; RV32-NEXT: vmv.s.x v9, a0 437; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 438; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t 439; RV32-NEXT: vmv.x.s a0, v9 440; RV32-NEXT: ret 441; 442; RV64-LABEL: vpreduce_umax_v4i16: 443; RV64: # %bb.0: 444; RV64-NEXT: slli a0, a0, 48 445; RV64-NEXT: srli a0, a0, 48 446; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 447; RV64-NEXT: vmv.s.x v9, a0 448; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 449; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t 450; RV64-NEXT: vmv.x.s a0, v9 451; RV64-NEXT: ret 452 %r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 453 ret i16 %r 454} 455 456declare i16 @llvm.vp.reduce.smax.v4i16(i16, <4 x i16>, <4 x i1>, i32) 457 458define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 459; CHECK-LABEL: vpreduce_smax_v4i16: 460; CHECK: # %bb.0: 461; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 462; CHECK-NEXT: vmv.s.x v9, a0 463; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 464; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t 465; CHECK-NEXT: vmv.x.s a0, v9 466; CHECK-NEXT: ret 467 %r = call i16 @llvm.vp.reduce.smax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 468 ret i16 %r 469} 470 471declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32) 472 473define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 474; RV32-LABEL: vpreduce_umin_v4i16: 475; RV32: # %bb.0: 476; RV32-NEXT: slli a0, a0, 16 477; RV32-NEXT: srli a0, a0, 16 478; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 479; RV32-NEXT: vmv.s.x v9, a0 480; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 481; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t 482; RV32-NEXT: vmv.x.s a0, v9 483; RV32-NEXT: ret 484; 485; RV64-LABEL: vpreduce_umin_v4i16: 486; RV64: # %bb.0: 487; RV64-NEXT: slli a0, a0, 48 488; RV64-NEXT: srli a0, a0, 48 489; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 490; RV64-NEXT: vmv.s.x v9, a0 491; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 492; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t 493; RV64-NEXT: vmv.x.s a0, v9 494; RV64-NEXT: ret 495 %r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 496 ret i16 %r 497} 498 499declare i16 @llvm.vp.reduce.smin.v4i16(i16, <4 x i16>, <4 x i1>, i32) 500 501define signext i16 @vpreduce_smin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 502; CHECK-LABEL: vpreduce_smin_v4i16: 503; CHECK: # %bb.0: 504; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 505; CHECK-NEXT: vmv.s.x v9, a0 506; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 507; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t 508; CHECK-NEXT: vmv.x.s a0, v9 509; CHECK-NEXT: ret 510 %r = call i16 @llvm.vp.reduce.smin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 511 ret i16 %r 512} 513 514declare i16 @llvm.vp.reduce.and.v4i16(i16, <4 x i16>, <4 x i1>, i32) 515 516define signext i16 @vpreduce_and_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 517; CHECK-LABEL: vpreduce_and_v4i16: 518; CHECK: # %bb.0: 519; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 520; CHECK-NEXT: vmv.s.x v9, a0 521; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 522; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t 523; CHECK-NEXT: vmv.x.s a0, v9 524; CHECK-NEXT: ret 525 %r = call i16 @llvm.vp.reduce.and.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 526 ret i16 %r 527} 528 529declare i16 @llvm.vp.reduce.or.v4i16(i16, <4 x i16>, <4 x i1>, i32) 530 531define signext i16 @vpreduce_or_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 532; CHECK-LABEL: vpreduce_or_v4i16: 533; CHECK: # %bb.0: 534; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 535; CHECK-NEXT: vmv.s.x v9, a0 536; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 537; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t 538; CHECK-NEXT: vmv.x.s a0, v9 539; CHECK-NEXT: ret 540 %r = call i16 @llvm.vp.reduce.or.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 541 ret i16 %r 542} 543 544declare i16 @llvm.vp.reduce.xor.v4i16(i16, <4 x i16>, <4 x i1>, i32) 545 546define signext i16 @vpreduce_xor_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { 547; CHECK-LABEL: vpreduce_xor_v4i16: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 550; CHECK-NEXT: vmv.s.x v9, a0 551; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 552; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t 553; CHECK-NEXT: vmv.x.s a0, v9 554; CHECK-NEXT: ret 555 %r = call i16 @llvm.vp.reduce.xor.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) 556 ret i16 %r 557} 558 559declare i32 @llvm.vp.reduce.add.v2i32(i32, <2 x i32>, <2 x i1>, i32) 560 561define signext i32 @vpreduce_add_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 562; CHECK-LABEL: vpreduce_add_v2i32: 563; CHECK: # %bb.0: 564; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 565; CHECK-NEXT: vmv.s.x v9, a0 566; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 567; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t 568; CHECK-NEXT: vmv.x.s a0, v9 569; CHECK-NEXT: ret 570 %r = call i32 @llvm.vp.reduce.add.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 571 ret i32 %r 572} 573 574declare i32 @llvm.vp.reduce.umax.v2i32(i32, <2 x i32>, <2 x i1>, i32) 575 576define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 577; CHECK-LABEL: vpreduce_umax_v2i32: 578; CHECK: # %bb.0: 579; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 580; CHECK-NEXT: vmv.s.x v9, a0 581; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 582; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t 583; CHECK-NEXT: vmv.x.s a0, v9 584; CHECK-NEXT: ret 585 %r = call i32 @llvm.vp.reduce.umax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 586 ret i32 %r 587} 588 589declare i32 @llvm.vp.reduce.smax.v2i32(i32, <2 x i32>, <2 x i1>, i32) 590 591define signext i32 @vpreduce_smax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 592; CHECK-LABEL: vpreduce_smax_v2i32: 593; CHECK: # %bb.0: 594; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 595; CHECK-NEXT: vmv.s.x v9, a0 596; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 597; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t 598; CHECK-NEXT: vmv.x.s a0, v9 599; CHECK-NEXT: ret 600 %r = call i32 @llvm.vp.reduce.smax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 601 ret i32 %r 602} 603 604declare i32 @llvm.vp.reduce.umin.v2i32(i32, <2 x i32>, <2 x i1>, i32) 605 606define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 607; CHECK-LABEL: vpreduce_umin_v2i32: 608; CHECK: # %bb.0: 609; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 610; CHECK-NEXT: vmv.s.x v9, a0 611; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 612; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t 613; CHECK-NEXT: vmv.x.s a0, v9 614; CHECK-NEXT: ret 615 %r = call i32 @llvm.vp.reduce.umin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 616 ret i32 %r 617} 618 619declare i32 @llvm.vp.reduce.smin.v2i32(i32, <2 x i32>, <2 x i1>, i32) 620 621define signext i32 @vpreduce_smin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 622; CHECK-LABEL: vpreduce_smin_v2i32: 623; CHECK: # %bb.0: 624; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 625; CHECK-NEXT: vmv.s.x v9, a0 626; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 627; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t 628; CHECK-NEXT: vmv.x.s a0, v9 629; CHECK-NEXT: ret 630 %r = call i32 @llvm.vp.reduce.smin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 631 ret i32 %r 632} 633 634declare i32 @llvm.vp.reduce.and.v2i32(i32, <2 x i32>, <2 x i1>, i32) 635 636define signext i32 @vpreduce_and_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 637; CHECK-LABEL: vpreduce_and_v2i32: 638; CHECK: # %bb.0: 639; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 640; CHECK-NEXT: vmv.s.x v9, a0 641; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 642; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t 643; CHECK-NEXT: vmv.x.s a0, v9 644; CHECK-NEXT: ret 645 %r = call i32 @llvm.vp.reduce.and.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 646 ret i32 %r 647} 648 649declare i32 @llvm.vp.reduce.or.v2i32(i32, <2 x i32>, <2 x i1>, i32) 650 651define signext i32 @vpreduce_or_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 652; CHECK-LABEL: vpreduce_or_v2i32: 653; CHECK: # %bb.0: 654; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 655; CHECK-NEXT: vmv.s.x v9, a0 656; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 657; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t 658; CHECK-NEXT: vmv.x.s a0, v9 659; CHECK-NEXT: ret 660 %r = call i32 @llvm.vp.reduce.or.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 661 ret i32 %r 662} 663 664declare i32 @llvm.vp.reduce.xor.v2i32(i32, <2 x i32>, <2 x i1>, i32) 665 666define signext i32 @vpreduce_xor_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { 667; CHECK-LABEL: vpreduce_xor_v2i32: 668; CHECK: # %bb.0: 669; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 670; CHECK-NEXT: vmv.s.x v9, a0 671; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 672; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t 673; CHECK-NEXT: vmv.x.s a0, v9 674; CHECK-NEXT: ret 675 %r = call i32 @llvm.vp.reduce.xor.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) 676 ret i32 %r 677} 678 679declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32) 680 681define signext i32 @vpreduce_add_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 682; CHECK-LABEL: vpreduce_add_v4i32: 683; CHECK: # %bb.0: 684; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 685; CHECK-NEXT: vmv.s.x v9, a0 686; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 687; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t 688; CHECK-NEXT: vmv.x.s a0, v9 689; CHECK-NEXT: ret 690 %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 691 ret i32 %r 692} 693 694declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32) 695 696define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 697; CHECK-LABEL: vpreduce_umax_v4i32: 698; CHECK: # %bb.0: 699; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 700; CHECK-NEXT: vmv.s.x v9, a0 701; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 702; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t 703; CHECK-NEXT: vmv.x.s a0, v9 704; CHECK-NEXT: ret 705 %r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 706 ret i32 %r 707} 708 709declare i32 @llvm.vp.reduce.smax.v4i32(i32, <4 x i32>, <4 x i1>, i32) 710 711define signext i32 @vpreduce_smax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 712; CHECK-LABEL: vpreduce_smax_v4i32: 713; CHECK: # %bb.0: 714; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 715; CHECK-NEXT: vmv.s.x v9, a0 716; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 717; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t 718; CHECK-NEXT: vmv.x.s a0, v9 719; CHECK-NEXT: ret 720 %r = call i32 @llvm.vp.reduce.smax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 721 ret i32 %r 722} 723 724declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32) 725 726define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 727; CHECK-LABEL: vpreduce_umin_v4i32: 728; CHECK: # %bb.0: 729; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 730; CHECK-NEXT: vmv.s.x v9, a0 731; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 732; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t 733; CHECK-NEXT: vmv.x.s a0, v9 734; CHECK-NEXT: ret 735 %r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 736 ret i32 %r 737} 738 739declare i32 @llvm.vp.reduce.smin.v4i32(i32, <4 x i32>, <4 x i1>, i32) 740 741define signext i32 @vpreduce_smin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 742; CHECK-LABEL: vpreduce_smin_v4i32: 743; CHECK: # %bb.0: 744; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 745; CHECK-NEXT: vmv.s.x v9, a0 746; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 747; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t 748; CHECK-NEXT: vmv.x.s a0, v9 749; CHECK-NEXT: ret 750 %r = call i32 @llvm.vp.reduce.smin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 751 ret i32 %r 752} 753 754declare i32 @llvm.vp.reduce.and.v4i32(i32, <4 x i32>, <4 x i1>, i32) 755 756define signext i32 @vpreduce_and_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 757; CHECK-LABEL: vpreduce_and_v4i32: 758; CHECK: # %bb.0: 759; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 760; CHECK-NEXT: vmv.s.x v9, a0 761; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 762; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t 763; CHECK-NEXT: vmv.x.s a0, v9 764; CHECK-NEXT: ret 765 %r = call i32 @llvm.vp.reduce.and.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 766 ret i32 %r 767} 768 769declare i32 @llvm.vp.reduce.or.v4i32(i32, <4 x i32>, <4 x i1>, i32) 770 771define signext i32 @vpreduce_or_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 772; CHECK-LABEL: vpreduce_or_v4i32: 773; CHECK: # %bb.0: 774; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 775; CHECK-NEXT: vmv.s.x v9, a0 776; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 777; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t 778; CHECK-NEXT: vmv.x.s a0, v9 779; CHECK-NEXT: ret 780 %r = call i32 @llvm.vp.reduce.or.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 781 ret i32 %r 782} 783 784declare i32 @llvm.vp.reduce.xor.v4i32(i32, <4 x i32>, <4 x i1>, i32) 785 786define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { 787; CHECK-LABEL: vpreduce_xor_v4i32: 788; CHECK: # %bb.0: 789; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 790; CHECK-NEXT: vmv.s.x v9, a0 791; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 792; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t 793; CHECK-NEXT: vmv.x.s a0, v9 794; CHECK-NEXT: ret 795 %r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) 796 ret i32 %r 797} 798 799declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32) 800 801define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) { 802; CHECK-LABEL: vpreduce_xor_v64i32: 803; CHECK: # %bb.0: 804; CHECK-NEXT: li a3, 32 805; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 806; CHECK-NEXT: vslidedown.vi v24, v0, 4 807; CHECK-NEXT: mv a2, a1 808; CHECK-NEXT: bltu a1, a3, .LBB49_2 809; CHECK-NEXT: # %bb.1: 810; CHECK-NEXT: li a2, 32 811; CHECK-NEXT: .LBB49_2: 812; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 813; CHECK-NEXT: vmv.s.x v25, a0 814; CHECK-NEXT: addi a0, a1, -32 815; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 816; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t 817; CHECK-NEXT: sltu a1, a1, a0 818; CHECK-NEXT: addi a1, a1, -1 819; CHECK-NEXT: and a0, a1, a0 820; CHECK-NEXT: vmv1r.v v0, v24 821; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 822; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t 823; CHECK-NEXT: vmv.x.s a0, v25 824; CHECK-NEXT: ret 825 %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl) 826 ret i32 %r 827} 828 829declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32) 830 831define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 832; RV32-LABEL: vpreduce_add_v2i64: 833; RV32: # %bb.0: 834; RV32-NEXT: addi sp, sp, -16 835; RV32-NEXT: .cfi_def_cfa_offset 16 836; RV32-NEXT: sw a0, 8(sp) 837; RV32-NEXT: sw a1, 12(sp) 838; RV32-NEXT: addi a0, sp, 8 839; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 840; RV32-NEXT: vlse64.v v9, (a0), zero 841; RV32-NEXT: li a1, 32 842; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 843; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t 844; RV32-NEXT: vmv.x.s a0, v9 845; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 846; RV32-NEXT: vsrl.vx v8, v9, a1 847; RV32-NEXT: vmv.x.s a1, v8 848; RV32-NEXT: addi sp, sp, 16 849; RV32-NEXT: .cfi_def_cfa_offset 0 850; RV32-NEXT: ret 851; 852; RV64-LABEL: vpreduce_add_v2i64: 853; RV64: # %bb.0: 854; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 855; RV64-NEXT: vmv.s.x v9, a0 856; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 857; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t 858; RV64-NEXT: vmv.x.s a0, v9 859; RV64-NEXT: ret 860 %r = call i64 @llvm.vp.reduce.add.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 861 ret i64 %r 862} 863 864declare i64 @llvm.vp.reduce.umax.v2i64(i64, <2 x i64>, <2 x i1>, i32) 865 866define signext i64 @vpreduce_umax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 867; RV32-LABEL: vpreduce_umax_v2i64: 868; RV32: # %bb.0: 869; RV32-NEXT: addi sp, sp, -16 870; RV32-NEXT: .cfi_def_cfa_offset 16 871; RV32-NEXT: sw a0, 8(sp) 872; RV32-NEXT: sw a1, 12(sp) 873; RV32-NEXT: addi a0, sp, 8 874; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 875; RV32-NEXT: vlse64.v v9, (a0), zero 876; RV32-NEXT: li a1, 32 877; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 878; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t 879; RV32-NEXT: vmv.x.s a0, v9 880; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 881; RV32-NEXT: vsrl.vx v8, v9, a1 882; RV32-NEXT: vmv.x.s a1, v8 883; RV32-NEXT: addi sp, sp, 16 884; RV32-NEXT: .cfi_def_cfa_offset 0 885; RV32-NEXT: ret 886; 887; RV64-LABEL: vpreduce_umax_v2i64: 888; RV64: # %bb.0: 889; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 890; RV64-NEXT: vmv.s.x v9, a0 891; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 892; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t 893; RV64-NEXT: vmv.x.s a0, v9 894; RV64-NEXT: ret 895 %r = call i64 @llvm.vp.reduce.umax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 896 ret i64 %r 897} 898 899declare i64 @llvm.vp.reduce.smax.v2i64(i64, <2 x i64>, <2 x i1>, i32) 900 901define signext i64 @vpreduce_smax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 902; RV32-LABEL: vpreduce_smax_v2i64: 903; RV32: # %bb.0: 904; RV32-NEXT: addi sp, sp, -16 905; RV32-NEXT: .cfi_def_cfa_offset 16 906; RV32-NEXT: sw a0, 8(sp) 907; RV32-NEXT: sw a1, 12(sp) 908; RV32-NEXT: addi a0, sp, 8 909; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 910; RV32-NEXT: vlse64.v v9, (a0), zero 911; RV32-NEXT: li a1, 32 912; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 913; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t 914; RV32-NEXT: vmv.x.s a0, v9 915; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 916; RV32-NEXT: vsrl.vx v8, v9, a1 917; RV32-NEXT: vmv.x.s a1, v8 918; RV32-NEXT: addi sp, sp, 16 919; RV32-NEXT: .cfi_def_cfa_offset 0 920; RV32-NEXT: ret 921; 922; RV64-LABEL: vpreduce_smax_v2i64: 923; RV64: # %bb.0: 924; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 925; RV64-NEXT: vmv.s.x v9, a0 926; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 927; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t 928; RV64-NEXT: vmv.x.s a0, v9 929; RV64-NEXT: ret 930 %r = call i64 @llvm.vp.reduce.smax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 931 ret i64 %r 932} 933 934declare i64 @llvm.vp.reduce.umin.v2i64(i64, <2 x i64>, <2 x i1>, i32) 935 936define signext i64 @vpreduce_umin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 937; RV32-LABEL: vpreduce_umin_v2i64: 938; RV32: # %bb.0: 939; RV32-NEXT: addi sp, sp, -16 940; RV32-NEXT: .cfi_def_cfa_offset 16 941; RV32-NEXT: sw a0, 8(sp) 942; RV32-NEXT: sw a1, 12(sp) 943; RV32-NEXT: addi a0, sp, 8 944; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 945; RV32-NEXT: vlse64.v v9, (a0), zero 946; RV32-NEXT: li a1, 32 947; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 948; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t 949; RV32-NEXT: vmv.x.s a0, v9 950; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 951; RV32-NEXT: vsrl.vx v8, v9, a1 952; RV32-NEXT: vmv.x.s a1, v8 953; RV32-NEXT: addi sp, sp, 16 954; RV32-NEXT: .cfi_def_cfa_offset 0 955; RV32-NEXT: ret 956; 957; RV64-LABEL: vpreduce_umin_v2i64: 958; RV64: # %bb.0: 959; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 960; RV64-NEXT: vmv.s.x v9, a0 961; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 962; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t 963; RV64-NEXT: vmv.x.s a0, v9 964; RV64-NEXT: ret 965 %r = call i64 @llvm.vp.reduce.umin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 966 ret i64 %r 967} 968 969declare i64 @llvm.vp.reduce.smin.v2i64(i64, <2 x i64>, <2 x i1>, i32) 970 971define signext i64 @vpreduce_smin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 972; RV32-LABEL: vpreduce_smin_v2i64: 973; RV32: # %bb.0: 974; RV32-NEXT: addi sp, sp, -16 975; RV32-NEXT: .cfi_def_cfa_offset 16 976; RV32-NEXT: sw a0, 8(sp) 977; RV32-NEXT: sw a1, 12(sp) 978; RV32-NEXT: addi a0, sp, 8 979; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 980; RV32-NEXT: vlse64.v v9, (a0), zero 981; RV32-NEXT: li a1, 32 982; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 983; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t 984; RV32-NEXT: vmv.x.s a0, v9 985; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 986; RV32-NEXT: vsrl.vx v8, v9, a1 987; RV32-NEXT: vmv.x.s a1, v8 988; RV32-NEXT: addi sp, sp, 16 989; RV32-NEXT: .cfi_def_cfa_offset 0 990; RV32-NEXT: ret 991; 992; RV64-LABEL: vpreduce_smin_v2i64: 993; RV64: # %bb.0: 994; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 995; RV64-NEXT: vmv.s.x v9, a0 996; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 997; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t 998; RV64-NEXT: vmv.x.s a0, v9 999; RV64-NEXT: ret 1000 %r = call i64 @llvm.vp.reduce.smin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 1001 ret i64 %r 1002} 1003 1004declare i64 @llvm.vp.reduce.and.v2i64(i64, <2 x i64>, <2 x i1>, i32) 1005 1006define signext i64 @vpreduce_and_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 1007; RV32-LABEL: vpreduce_and_v2i64: 1008; RV32: # %bb.0: 1009; RV32-NEXT: addi sp, sp, -16 1010; RV32-NEXT: .cfi_def_cfa_offset 16 1011; RV32-NEXT: sw a0, 8(sp) 1012; RV32-NEXT: sw a1, 12(sp) 1013; RV32-NEXT: addi a0, sp, 8 1014; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1015; RV32-NEXT: vlse64.v v9, (a0), zero 1016; RV32-NEXT: li a1, 32 1017; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 1018; RV32-NEXT: vredand.vs v9, v8, v9, v0.t 1019; RV32-NEXT: vmv.x.s a0, v9 1020; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1021; RV32-NEXT: vsrl.vx v8, v9, a1 1022; RV32-NEXT: vmv.x.s a1, v8 1023; RV32-NEXT: addi sp, sp, 16 1024; RV32-NEXT: .cfi_def_cfa_offset 0 1025; RV32-NEXT: ret 1026; 1027; RV64-LABEL: vpreduce_and_v2i64: 1028; RV64: # %bb.0: 1029; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1030; RV64-NEXT: vmv.s.x v9, a0 1031; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 1032; RV64-NEXT: vredand.vs v9, v8, v9, v0.t 1033; RV64-NEXT: vmv.x.s a0, v9 1034; RV64-NEXT: ret 1035 %r = call i64 @llvm.vp.reduce.and.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 1036 ret i64 %r 1037} 1038 1039declare i64 @llvm.vp.reduce.or.v2i64(i64, <2 x i64>, <2 x i1>, i32) 1040 1041define signext i64 @vpreduce_or_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 1042; RV32-LABEL: vpreduce_or_v2i64: 1043; RV32: # %bb.0: 1044; RV32-NEXT: addi sp, sp, -16 1045; RV32-NEXT: .cfi_def_cfa_offset 16 1046; RV32-NEXT: sw a0, 8(sp) 1047; RV32-NEXT: sw a1, 12(sp) 1048; RV32-NEXT: addi a0, sp, 8 1049; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1050; RV32-NEXT: vlse64.v v9, (a0), zero 1051; RV32-NEXT: li a1, 32 1052; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 1053; RV32-NEXT: vredor.vs v9, v8, v9, v0.t 1054; RV32-NEXT: vmv.x.s a0, v9 1055; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1056; RV32-NEXT: vsrl.vx v8, v9, a1 1057; RV32-NEXT: vmv.x.s a1, v8 1058; RV32-NEXT: addi sp, sp, 16 1059; RV32-NEXT: .cfi_def_cfa_offset 0 1060; RV32-NEXT: ret 1061; 1062; RV64-LABEL: vpreduce_or_v2i64: 1063; RV64: # %bb.0: 1064; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1065; RV64-NEXT: vmv.s.x v9, a0 1066; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 1067; RV64-NEXT: vredor.vs v9, v8, v9, v0.t 1068; RV64-NEXT: vmv.x.s a0, v9 1069; RV64-NEXT: ret 1070 %r = call i64 @llvm.vp.reduce.or.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 1071 ret i64 %r 1072} 1073 1074declare i64 @llvm.vp.reduce.xor.v2i64(i64, <2 x i64>, <2 x i1>, i32) 1075 1076define signext i64 @vpreduce_xor_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) { 1077; RV32-LABEL: vpreduce_xor_v2i64: 1078; RV32: # %bb.0: 1079; RV32-NEXT: addi sp, sp, -16 1080; RV32-NEXT: .cfi_def_cfa_offset 16 1081; RV32-NEXT: sw a0, 8(sp) 1082; RV32-NEXT: sw a1, 12(sp) 1083; RV32-NEXT: addi a0, sp, 8 1084; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1085; RV32-NEXT: vlse64.v v9, (a0), zero 1086; RV32-NEXT: li a1, 32 1087; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 1088; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t 1089; RV32-NEXT: vmv.x.s a0, v9 1090; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1091; RV32-NEXT: vsrl.vx v8, v9, a1 1092; RV32-NEXT: vmv.x.s a1, v8 1093; RV32-NEXT: addi sp, sp, 16 1094; RV32-NEXT: .cfi_def_cfa_offset 0 1095; RV32-NEXT: ret 1096; 1097; RV64-LABEL: vpreduce_xor_v2i64: 1098; RV64: # %bb.0: 1099; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1100; RV64-NEXT: vmv.s.x v9, a0 1101; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma 1102; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t 1103; RV64-NEXT: vmv.x.s a0, v9 1104; RV64-NEXT: ret 1105 %r = call i64 @llvm.vp.reduce.xor.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) 1106 ret i64 %r 1107} 1108 1109declare i64 @llvm.vp.reduce.add.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1110 1111define signext i64 @vpreduce_add_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1112; RV32-LABEL: vpreduce_add_v4i64: 1113; RV32: # %bb.0: 1114; RV32-NEXT: addi sp, sp, -16 1115; RV32-NEXT: .cfi_def_cfa_offset 16 1116; RV32-NEXT: sw a0, 8(sp) 1117; RV32-NEXT: sw a1, 12(sp) 1118; RV32-NEXT: addi a0, sp, 8 1119; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1120; RV32-NEXT: vlse64.v v10, (a0), zero 1121; RV32-NEXT: li a1, 32 1122; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1123; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t 1124; RV32-NEXT: vmv.x.s a0, v10 1125; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1126; RV32-NEXT: vsrl.vx v8, v10, a1 1127; RV32-NEXT: vmv.x.s a1, v8 1128; RV32-NEXT: addi sp, sp, 16 1129; RV32-NEXT: .cfi_def_cfa_offset 0 1130; RV32-NEXT: ret 1131; 1132; RV64-LABEL: vpreduce_add_v4i64: 1133; RV64: # %bb.0: 1134; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1135; RV64-NEXT: vmv.s.x v10, a0 1136; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1137; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t 1138; RV64-NEXT: vmv.x.s a0, v10 1139; RV64-NEXT: ret 1140 %r = call i64 @llvm.vp.reduce.add.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1141 ret i64 %r 1142} 1143 1144declare i64 @llvm.vp.reduce.umax.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1145 1146define signext i64 @vpreduce_umax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1147; RV32-LABEL: vpreduce_umax_v4i64: 1148; RV32: # %bb.0: 1149; RV32-NEXT: addi sp, sp, -16 1150; RV32-NEXT: .cfi_def_cfa_offset 16 1151; RV32-NEXT: sw a0, 8(sp) 1152; RV32-NEXT: sw a1, 12(sp) 1153; RV32-NEXT: addi a0, sp, 8 1154; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1155; RV32-NEXT: vlse64.v v10, (a0), zero 1156; RV32-NEXT: li a1, 32 1157; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1158; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t 1159; RV32-NEXT: vmv.x.s a0, v10 1160; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1161; RV32-NEXT: vsrl.vx v8, v10, a1 1162; RV32-NEXT: vmv.x.s a1, v8 1163; RV32-NEXT: addi sp, sp, 16 1164; RV32-NEXT: .cfi_def_cfa_offset 0 1165; RV32-NEXT: ret 1166; 1167; RV64-LABEL: vpreduce_umax_v4i64: 1168; RV64: # %bb.0: 1169; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1170; RV64-NEXT: vmv.s.x v10, a0 1171; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1172; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t 1173; RV64-NEXT: vmv.x.s a0, v10 1174; RV64-NEXT: ret 1175 %r = call i64 @llvm.vp.reduce.umax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1176 ret i64 %r 1177} 1178 1179declare i64 @llvm.vp.reduce.smax.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1180 1181define signext i64 @vpreduce_smax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1182; RV32-LABEL: vpreduce_smax_v4i64: 1183; RV32: # %bb.0: 1184; RV32-NEXT: addi sp, sp, -16 1185; RV32-NEXT: .cfi_def_cfa_offset 16 1186; RV32-NEXT: sw a0, 8(sp) 1187; RV32-NEXT: sw a1, 12(sp) 1188; RV32-NEXT: addi a0, sp, 8 1189; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1190; RV32-NEXT: vlse64.v v10, (a0), zero 1191; RV32-NEXT: li a1, 32 1192; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1193; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t 1194; RV32-NEXT: vmv.x.s a0, v10 1195; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1196; RV32-NEXT: vsrl.vx v8, v10, a1 1197; RV32-NEXT: vmv.x.s a1, v8 1198; RV32-NEXT: addi sp, sp, 16 1199; RV32-NEXT: .cfi_def_cfa_offset 0 1200; RV32-NEXT: ret 1201; 1202; RV64-LABEL: vpreduce_smax_v4i64: 1203; RV64: # %bb.0: 1204; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1205; RV64-NEXT: vmv.s.x v10, a0 1206; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1207; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t 1208; RV64-NEXT: vmv.x.s a0, v10 1209; RV64-NEXT: ret 1210 %r = call i64 @llvm.vp.reduce.smax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1211 ret i64 %r 1212} 1213 1214declare i64 @llvm.vp.reduce.umin.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1215 1216define signext i64 @vpreduce_umin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1217; RV32-LABEL: vpreduce_umin_v4i64: 1218; RV32: # %bb.0: 1219; RV32-NEXT: addi sp, sp, -16 1220; RV32-NEXT: .cfi_def_cfa_offset 16 1221; RV32-NEXT: sw a0, 8(sp) 1222; RV32-NEXT: sw a1, 12(sp) 1223; RV32-NEXT: addi a0, sp, 8 1224; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1225; RV32-NEXT: vlse64.v v10, (a0), zero 1226; RV32-NEXT: li a1, 32 1227; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1228; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t 1229; RV32-NEXT: vmv.x.s a0, v10 1230; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1231; RV32-NEXT: vsrl.vx v8, v10, a1 1232; RV32-NEXT: vmv.x.s a1, v8 1233; RV32-NEXT: addi sp, sp, 16 1234; RV32-NEXT: .cfi_def_cfa_offset 0 1235; RV32-NEXT: ret 1236; 1237; RV64-LABEL: vpreduce_umin_v4i64: 1238; RV64: # %bb.0: 1239; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1240; RV64-NEXT: vmv.s.x v10, a0 1241; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1242; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t 1243; RV64-NEXT: vmv.x.s a0, v10 1244; RV64-NEXT: ret 1245 %r = call i64 @llvm.vp.reduce.umin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1246 ret i64 %r 1247} 1248 1249declare i64 @llvm.vp.reduce.smin.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1250 1251define signext i64 @vpreduce_smin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1252; RV32-LABEL: vpreduce_smin_v4i64: 1253; RV32: # %bb.0: 1254; RV32-NEXT: addi sp, sp, -16 1255; RV32-NEXT: .cfi_def_cfa_offset 16 1256; RV32-NEXT: sw a0, 8(sp) 1257; RV32-NEXT: sw a1, 12(sp) 1258; RV32-NEXT: addi a0, sp, 8 1259; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1260; RV32-NEXT: vlse64.v v10, (a0), zero 1261; RV32-NEXT: li a1, 32 1262; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1263; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t 1264; RV32-NEXT: vmv.x.s a0, v10 1265; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1266; RV32-NEXT: vsrl.vx v8, v10, a1 1267; RV32-NEXT: vmv.x.s a1, v8 1268; RV32-NEXT: addi sp, sp, 16 1269; RV32-NEXT: .cfi_def_cfa_offset 0 1270; RV32-NEXT: ret 1271; 1272; RV64-LABEL: vpreduce_smin_v4i64: 1273; RV64: # %bb.0: 1274; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1275; RV64-NEXT: vmv.s.x v10, a0 1276; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1277; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t 1278; RV64-NEXT: vmv.x.s a0, v10 1279; RV64-NEXT: ret 1280 %r = call i64 @llvm.vp.reduce.smin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1281 ret i64 %r 1282} 1283 1284declare i64 @llvm.vp.reduce.and.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1285 1286define signext i64 @vpreduce_and_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1287; RV32-LABEL: vpreduce_and_v4i64: 1288; RV32: # %bb.0: 1289; RV32-NEXT: addi sp, sp, -16 1290; RV32-NEXT: .cfi_def_cfa_offset 16 1291; RV32-NEXT: sw a0, 8(sp) 1292; RV32-NEXT: sw a1, 12(sp) 1293; RV32-NEXT: addi a0, sp, 8 1294; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1295; RV32-NEXT: vlse64.v v10, (a0), zero 1296; RV32-NEXT: li a1, 32 1297; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1298; RV32-NEXT: vredand.vs v10, v8, v10, v0.t 1299; RV32-NEXT: vmv.x.s a0, v10 1300; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1301; RV32-NEXT: vsrl.vx v8, v10, a1 1302; RV32-NEXT: vmv.x.s a1, v8 1303; RV32-NEXT: addi sp, sp, 16 1304; RV32-NEXT: .cfi_def_cfa_offset 0 1305; RV32-NEXT: ret 1306; 1307; RV64-LABEL: vpreduce_and_v4i64: 1308; RV64: # %bb.0: 1309; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1310; RV64-NEXT: vmv.s.x v10, a0 1311; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1312; RV64-NEXT: vredand.vs v10, v8, v10, v0.t 1313; RV64-NEXT: vmv.x.s a0, v10 1314; RV64-NEXT: ret 1315 %r = call i64 @llvm.vp.reduce.and.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1316 ret i64 %r 1317} 1318 1319declare i64 @llvm.vp.reduce.or.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1320 1321define signext i64 @vpreduce_or_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1322; RV32-LABEL: vpreduce_or_v4i64: 1323; RV32: # %bb.0: 1324; RV32-NEXT: addi sp, sp, -16 1325; RV32-NEXT: .cfi_def_cfa_offset 16 1326; RV32-NEXT: sw a0, 8(sp) 1327; RV32-NEXT: sw a1, 12(sp) 1328; RV32-NEXT: addi a0, sp, 8 1329; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1330; RV32-NEXT: vlse64.v v10, (a0), zero 1331; RV32-NEXT: li a1, 32 1332; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1333; RV32-NEXT: vredor.vs v10, v8, v10, v0.t 1334; RV32-NEXT: vmv.x.s a0, v10 1335; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1336; RV32-NEXT: vsrl.vx v8, v10, a1 1337; RV32-NEXT: vmv.x.s a1, v8 1338; RV32-NEXT: addi sp, sp, 16 1339; RV32-NEXT: .cfi_def_cfa_offset 0 1340; RV32-NEXT: ret 1341; 1342; RV64-LABEL: vpreduce_or_v4i64: 1343; RV64: # %bb.0: 1344; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1345; RV64-NEXT: vmv.s.x v10, a0 1346; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1347; RV64-NEXT: vredor.vs v10, v8, v10, v0.t 1348; RV64-NEXT: vmv.x.s a0, v10 1349; RV64-NEXT: ret 1350 %r = call i64 @llvm.vp.reduce.or.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1351 ret i64 %r 1352} 1353 1354declare i64 @llvm.vp.reduce.xor.v4i64(i64, <4 x i64>, <4 x i1>, i32) 1355 1356define signext i64 @vpreduce_xor_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) { 1357; RV32-LABEL: vpreduce_xor_v4i64: 1358; RV32: # %bb.0: 1359; RV32-NEXT: addi sp, sp, -16 1360; RV32-NEXT: .cfi_def_cfa_offset 16 1361; RV32-NEXT: sw a0, 8(sp) 1362; RV32-NEXT: sw a1, 12(sp) 1363; RV32-NEXT: addi a0, sp, 8 1364; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1365; RV32-NEXT: vlse64.v v10, (a0), zero 1366; RV32-NEXT: li a1, 32 1367; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1368; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t 1369; RV32-NEXT: vmv.x.s a0, v10 1370; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1371; RV32-NEXT: vsrl.vx v8, v10, a1 1372; RV32-NEXT: vmv.x.s a1, v8 1373; RV32-NEXT: addi sp, sp, 16 1374; RV32-NEXT: .cfi_def_cfa_offset 0 1375; RV32-NEXT: ret 1376; 1377; RV64-LABEL: vpreduce_xor_v4i64: 1378; RV64: # %bb.0: 1379; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1380; RV64-NEXT: vmv.s.x v10, a0 1381; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma 1382; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t 1383; RV64-NEXT: vmv.x.s a0, v10 1384; RV64-NEXT: ret 1385 %r = call i64 @llvm.vp.reduce.xor.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) 1386 ret i64 %r 1387} 1388 1389declare i8 @llvm.vp.reduce.mul.v1i8(i8, <1 x i8>, <1 x i1>, i32) 1390 1391define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl) { 1392; RV32-LABEL: vpreduce_mul_v1i8: 1393; RV32: # %bb.0: 1394; RV32-NEXT: addi sp, sp, -16 1395; RV32-NEXT: .cfi_def_cfa_offset 16 1396; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1397; RV32-NEXT: .cfi_offset ra, -4 1398; RV32-NEXT: mv a2, a0 1399; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 1400; RV32-NEXT: vmv.s.x v9, a1 1401; RV32-NEXT: vmsne.vi v9, v9, 0 1402; RV32-NEXT: vmand.mm v0, v9, v0 1403; RV32-NEXT: vmv.v.i v9, 1 1404; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 1405; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 1406; RV32-NEXT: vmv.x.s a0, v8 1407; RV32-NEXT: mv a1, a2 1408; RV32-NEXT: call __mulsi3 1409; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1410; RV32-NEXT: .cfi_restore ra 1411; RV32-NEXT: addi sp, sp, 16 1412; RV32-NEXT: .cfi_def_cfa_offset 0 1413; RV32-NEXT: ret 1414; 1415; RV64-LABEL: vpreduce_mul_v1i8: 1416; RV64: # %bb.0: 1417; RV64-NEXT: addi sp, sp, -16 1418; RV64-NEXT: .cfi_def_cfa_offset 16 1419; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1420; RV64-NEXT: .cfi_offset ra, -8 1421; RV64-NEXT: mv a2, a0 1422; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 1423; RV64-NEXT: vmv.s.x v9, a1 1424; RV64-NEXT: vmsne.vi v9, v9, 0 1425; RV64-NEXT: vmand.mm v0, v9, v0 1426; RV64-NEXT: vmv.v.i v9, 1 1427; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 1428; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 1429; RV64-NEXT: vmv.x.s a0, v8 1430; RV64-NEXT: mv a1, a2 1431; RV64-NEXT: call __muldi3 1432; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1433; RV64-NEXT: .cfi_restore ra 1434; RV64-NEXT: addi sp, sp, 16 1435; RV64-NEXT: .cfi_def_cfa_offset 0 1436; RV64-NEXT: ret 1437 %r = call i8 @llvm.vp.reduce.mul.v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 %evl) 1438 ret i8 %r 1439} 1440 1441declare i8 @llvm.vp.reduce.mul.v2i8(i8, <2 x i8>, <2 x i1>, i32) 1442 1443define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { 1444; RV32-LABEL: vpreduce_mul_v2i8: 1445; RV32: # %bb.0: 1446; RV32-NEXT: addi sp, sp, -16 1447; RV32-NEXT: .cfi_def_cfa_offset 16 1448; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1449; RV32-NEXT: .cfi_offset ra, -4 1450; RV32-NEXT: mv a2, a0 1451; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1452; RV32-NEXT: vid.v v9 1453; RV32-NEXT: vmsltu.vx v9, v9, a1 1454; RV32-NEXT: vmand.mm v0, v9, v0 1455; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 1456; RV32-NEXT: vmv.v.i v9, 1 1457; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 1458; RV32-NEXT: vrgather.vi v9, v8, 1 1459; RV32-NEXT: vmul.vv v8, v8, v9 1460; RV32-NEXT: vmv.x.s a0, v8 1461; RV32-NEXT: mv a1, a2 1462; RV32-NEXT: call __mulsi3 1463; RV32-NEXT: slli a0, a0, 24 1464; RV32-NEXT: srai a0, a0, 24 1465; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1466; RV32-NEXT: .cfi_restore ra 1467; RV32-NEXT: addi sp, sp, 16 1468; RV32-NEXT: .cfi_def_cfa_offset 0 1469; RV32-NEXT: ret 1470; 1471; RV64-LABEL: vpreduce_mul_v2i8: 1472; RV64: # %bb.0: 1473; RV64-NEXT: addi sp, sp, -16 1474; RV64-NEXT: .cfi_def_cfa_offset 16 1475; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1476; RV64-NEXT: .cfi_offset ra, -8 1477; RV64-NEXT: mv a2, a0 1478; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1479; RV64-NEXT: vid.v v9 1480; RV64-NEXT: vmsltu.vx v9, v9, a1 1481; RV64-NEXT: vmand.mm v0, v9, v0 1482; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 1483; RV64-NEXT: vmv.v.i v9, 1 1484; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 1485; RV64-NEXT: vrgather.vi v9, v8, 1 1486; RV64-NEXT: vmul.vv v8, v8, v9 1487; RV64-NEXT: vmv.x.s a0, v8 1488; RV64-NEXT: mv a1, a2 1489; RV64-NEXT: call __muldi3 1490; RV64-NEXT: slli a0, a0, 56 1491; RV64-NEXT: srai a0, a0, 56 1492; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1493; RV64-NEXT: .cfi_restore ra 1494; RV64-NEXT: addi sp, sp, 16 1495; RV64-NEXT: .cfi_def_cfa_offset 0 1496; RV64-NEXT: ret 1497 %r = call i8 @llvm.vp.reduce.mul.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) 1498 ret i8 %r 1499} 1500 1501declare i8 @llvm.vp.reduce.mul.v4i8(i8, <4 x i8>, <4 x i1>, i32) 1502 1503define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 1504; RV32-LABEL: vpreduce_mul_v4i8: 1505; RV32: # %bb.0: 1506; RV32-NEXT: addi sp, sp, -16 1507; RV32-NEXT: .cfi_def_cfa_offset 16 1508; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1509; RV32-NEXT: .cfi_offset ra, -4 1510; RV32-NEXT: mv a2, a0 1511; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1512; RV32-NEXT: vid.v v9 1513; RV32-NEXT: vmsltu.vx v9, v9, a1 1514; RV32-NEXT: vmand.mm v0, v9, v0 1515; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 1516; RV32-NEXT: vmv.v.i v9, 1 1517; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 1518; RV32-NEXT: vslidedown.vi v9, v8, 2 1519; RV32-NEXT: vmul.vv v8, v8, v9 1520; RV32-NEXT: vrgather.vi v9, v8, 1 1521; RV32-NEXT: vmul.vv v8, v8, v9 1522; RV32-NEXT: vmv.x.s a0, v8 1523; RV32-NEXT: mv a1, a2 1524; RV32-NEXT: call __mulsi3 1525; RV32-NEXT: slli a0, a0, 24 1526; RV32-NEXT: srai a0, a0, 24 1527; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1528; RV32-NEXT: .cfi_restore ra 1529; RV32-NEXT: addi sp, sp, 16 1530; RV32-NEXT: .cfi_def_cfa_offset 0 1531; RV32-NEXT: ret 1532; 1533; RV64-LABEL: vpreduce_mul_v4i8: 1534; RV64: # %bb.0: 1535; RV64-NEXT: addi sp, sp, -16 1536; RV64-NEXT: .cfi_def_cfa_offset 16 1537; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1538; RV64-NEXT: .cfi_offset ra, -8 1539; RV64-NEXT: mv a2, a0 1540; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1541; RV64-NEXT: vid.v v9 1542; RV64-NEXT: vmsltu.vx v9, v9, a1 1543; RV64-NEXT: vmand.mm v0, v9, v0 1544; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 1545; RV64-NEXT: vmv.v.i v9, 1 1546; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 1547; RV64-NEXT: vslidedown.vi v9, v8, 2 1548; RV64-NEXT: vmul.vv v8, v8, v9 1549; RV64-NEXT: vrgather.vi v9, v8, 1 1550; RV64-NEXT: vmul.vv v8, v8, v9 1551; RV64-NEXT: vmv.x.s a0, v8 1552; RV64-NEXT: mv a1, a2 1553; RV64-NEXT: call __muldi3 1554; RV64-NEXT: slli a0, a0, 56 1555; RV64-NEXT: srai a0, a0, 56 1556; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1557; RV64-NEXT: .cfi_restore ra 1558; RV64-NEXT: addi sp, sp, 16 1559; RV64-NEXT: .cfi_def_cfa_offset 0 1560; RV64-NEXT: ret 1561 %r = call i8 @llvm.vp.reduce.mul.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 1562 ret i8 %r 1563} 1564 1565declare i8 @llvm.vp.reduce.mul.v8i8(i8, <8 x i8>, <8 x i1>, i32) 1566 1567define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i32 zeroext %evl) { 1568; RV32-LABEL: vpreduce_mul_v8i8: 1569; RV32: # %bb.0: 1570; RV32-NEXT: addi sp, sp, -16 1571; RV32-NEXT: .cfi_def_cfa_offset 16 1572; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1573; RV32-NEXT: .cfi_offset ra, -4 1574; RV32-NEXT: mv a2, a0 1575; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1576; RV32-NEXT: vid.v v10 1577; RV32-NEXT: vmsltu.vx v9, v10, a1 1578; RV32-NEXT: vmand.mm v0, v9, v0 1579; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 1580; RV32-NEXT: vmv.v.i v9, 1 1581; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 1582; RV32-NEXT: vslidedown.vi v9, v8, 4 1583; RV32-NEXT: vmul.vv v8, v8, v9 1584; RV32-NEXT: vslidedown.vi v9, v8, 2 1585; RV32-NEXT: vmul.vv v8, v8, v9 1586; RV32-NEXT: vrgather.vi v9, v8, 1 1587; RV32-NEXT: vmul.vv v8, v8, v9 1588; RV32-NEXT: vmv.x.s a0, v8 1589; RV32-NEXT: mv a1, a2 1590; RV32-NEXT: call __mulsi3 1591; RV32-NEXT: slli a0, a0, 24 1592; RV32-NEXT: srai a0, a0, 24 1593; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1594; RV32-NEXT: .cfi_restore ra 1595; RV32-NEXT: addi sp, sp, 16 1596; RV32-NEXT: .cfi_def_cfa_offset 0 1597; RV32-NEXT: ret 1598; 1599; RV64-LABEL: vpreduce_mul_v8i8: 1600; RV64: # %bb.0: 1601; RV64-NEXT: addi sp, sp, -16 1602; RV64-NEXT: .cfi_def_cfa_offset 16 1603; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1604; RV64-NEXT: .cfi_offset ra, -8 1605; RV64-NEXT: mv a2, a0 1606; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1607; RV64-NEXT: vid.v v10 1608; RV64-NEXT: vmsltu.vx v9, v10, a1 1609; RV64-NEXT: vmand.mm v0, v9, v0 1610; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 1611; RV64-NEXT: vmv.v.i v9, 1 1612; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 1613; RV64-NEXT: vslidedown.vi v9, v8, 4 1614; RV64-NEXT: vmul.vv v8, v8, v9 1615; RV64-NEXT: vslidedown.vi v9, v8, 2 1616; RV64-NEXT: vmul.vv v8, v8, v9 1617; RV64-NEXT: vrgather.vi v9, v8, 1 1618; RV64-NEXT: vmul.vv v8, v8, v9 1619; RV64-NEXT: vmv.x.s a0, v8 1620; RV64-NEXT: mv a1, a2 1621; RV64-NEXT: call __muldi3 1622; RV64-NEXT: slli a0, a0, 56 1623; RV64-NEXT: srai a0, a0, 56 1624; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1625; RV64-NEXT: .cfi_restore ra 1626; RV64-NEXT: addi sp, sp, 16 1627; RV64-NEXT: .cfi_def_cfa_offset 0 1628; RV64-NEXT: ret 1629 %r = call i8 @llvm.vp.reduce.mul.v8i8(i8 %s, <8 x i8> %v, <8 x i1> %m, i32 %evl) 1630 ret i8 %r 1631} 1632 1633declare i8 @llvm.vp.reduce.mul.v16i8(i8, <16 x i8>, <16 x i1>, i32) 1634 1635define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, i32 zeroext %evl) { 1636; RV32-LABEL: vpreduce_mul_v16i8: 1637; RV32: # %bb.0: 1638; RV32-NEXT: addi sp, sp, -16 1639; RV32-NEXT: .cfi_def_cfa_offset 16 1640; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1641; RV32-NEXT: .cfi_offset ra, -4 1642; RV32-NEXT: mv a2, a0 1643; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1644; RV32-NEXT: vid.v v12 1645; RV32-NEXT: vmsltu.vx v9, v12, a1 1646; RV32-NEXT: vmand.mm v0, v9, v0 1647; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma 1648; RV32-NEXT: vmv.v.i v9, 1 1649; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 1650; RV32-NEXT: vslidedown.vi v9, v8, 8 1651; RV32-NEXT: vmul.vv v8, v8, v9 1652; RV32-NEXT: vslidedown.vi v9, v8, 4 1653; RV32-NEXT: vmul.vv v8, v8, v9 1654; RV32-NEXT: vslidedown.vi v9, v8, 2 1655; RV32-NEXT: vmul.vv v8, v8, v9 1656; RV32-NEXT: vrgather.vi v9, v8, 1 1657; RV32-NEXT: vmul.vv v8, v8, v9 1658; RV32-NEXT: vmv.x.s a0, v8 1659; RV32-NEXT: mv a1, a2 1660; RV32-NEXT: call __mulsi3 1661; RV32-NEXT: slli a0, a0, 24 1662; RV32-NEXT: srai a0, a0, 24 1663; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1664; RV32-NEXT: .cfi_restore ra 1665; RV32-NEXT: addi sp, sp, 16 1666; RV32-NEXT: .cfi_def_cfa_offset 0 1667; RV32-NEXT: ret 1668; 1669; RV64-LABEL: vpreduce_mul_v16i8: 1670; RV64: # %bb.0: 1671; RV64-NEXT: addi sp, sp, -16 1672; RV64-NEXT: .cfi_def_cfa_offset 16 1673; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1674; RV64-NEXT: .cfi_offset ra, -8 1675; RV64-NEXT: mv a2, a0 1676; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1677; RV64-NEXT: vid.v v12 1678; RV64-NEXT: vmsltu.vx v9, v12, a1 1679; RV64-NEXT: vmand.mm v0, v9, v0 1680; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 1681; RV64-NEXT: vmv.v.i v9, 1 1682; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 1683; RV64-NEXT: vslidedown.vi v9, v8, 8 1684; RV64-NEXT: vmul.vv v8, v8, v9 1685; RV64-NEXT: vslidedown.vi v9, v8, 4 1686; RV64-NEXT: vmul.vv v8, v8, v9 1687; RV64-NEXT: vslidedown.vi v9, v8, 2 1688; RV64-NEXT: vmul.vv v8, v8, v9 1689; RV64-NEXT: vrgather.vi v9, v8, 1 1690; RV64-NEXT: vmul.vv v8, v8, v9 1691; RV64-NEXT: vmv.x.s a0, v8 1692; RV64-NEXT: mv a1, a2 1693; RV64-NEXT: call __muldi3 1694; RV64-NEXT: slli a0, a0, 56 1695; RV64-NEXT: srai a0, a0, 56 1696; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1697; RV64-NEXT: .cfi_restore ra 1698; RV64-NEXT: addi sp, sp, 16 1699; RV64-NEXT: .cfi_def_cfa_offset 0 1700; RV64-NEXT: ret 1701 %r = call i8 @llvm.vp.reduce.mul.v16i8(i8 %s, <16 x i8> %v, <16 x i1> %m, i32 %evl) 1702 ret i8 %r 1703} 1704 1705declare i8 @llvm.vp.reduce.mul.v32i8(i8, <32 x i8>, <32 x i1>, i32) 1706 1707define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) { 1708; RV32-LABEL: vpreduce_mul_v32i8: 1709; RV32: # %bb.0: 1710; RV32-NEXT: addi sp, sp, -16 1711; RV32-NEXT: .cfi_def_cfa_offset 16 1712; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1713; RV32-NEXT: .cfi_offset ra, -4 1714; RV32-NEXT: mv a2, a0 1715; RV32-NEXT: li a0, 32 1716; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1717; RV32-NEXT: vid.v v16 1718; RV32-NEXT: vmsltu.vx v10, v16, a1 1719; RV32-NEXT: vmand.mm v0, v10, v0 1720; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma 1721; RV32-NEXT: vmv.v.i v10, 1 1722; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 1723; RV32-NEXT: vslidedown.vi v10, v8, 16 1724; RV32-NEXT: vmul.vv v8, v8, v10 1725; RV32-NEXT: vslidedown.vi v10, v8, 8 1726; RV32-NEXT: vmul.vv v8, v8, v10 1727; RV32-NEXT: vslidedown.vi v10, v8, 4 1728; RV32-NEXT: vmul.vv v8, v8, v10 1729; RV32-NEXT: vslidedown.vi v10, v8, 2 1730; RV32-NEXT: vmul.vv v8, v8, v10 1731; RV32-NEXT: vrgather.vi v10, v8, 1 1732; RV32-NEXT: vmul.vv v8, v8, v10 1733; RV32-NEXT: vmv.x.s a0, v8 1734; RV32-NEXT: mv a1, a2 1735; RV32-NEXT: call __mulsi3 1736; RV32-NEXT: slli a0, a0, 24 1737; RV32-NEXT: srai a0, a0, 24 1738; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1739; RV32-NEXT: .cfi_restore ra 1740; RV32-NEXT: addi sp, sp, 16 1741; RV32-NEXT: .cfi_def_cfa_offset 0 1742; RV32-NEXT: ret 1743; 1744; RV64-LABEL: vpreduce_mul_v32i8: 1745; RV64: # %bb.0: 1746; RV64-NEXT: addi sp, sp, -16 1747; RV64-NEXT: .cfi_def_cfa_offset 16 1748; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1749; RV64-NEXT: .cfi_offset ra, -8 1750; RV64-NEXT: mv a2, a0 1751; RV64-NEXT: li a0, 32 1752; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1753; RV64-NEXT: vid.v v16 1754; RV64-NEXT: vmsltu.vx v10, v16, a1 1755; RV64-NEXT: vmand.mm v0, v10, v0 1756; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma 1757; RV64-NEXT: vmv.v.i v10, 1 1758; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 1759; RV64-NEXT: vslidedown.vi v10, v8, 16 1760; RV64-NEXT: vmul.vv v8, v8, v10 1761; RV64-NEXT: vslidedown.vi v10, v8, 8 1762; RV64-NEXT: vmul.vv v8, v8, v10 1763; RV64-NEXT: vslidedown.vi v10, v8, 4 1764; RV64-NEXT: vmul.vv v8, v8, v10 1765; RV64-NEXT: vslidedown.vi v10, v8, 2 1766; RV64-NEXT: vmul.vv v8, v8, v10 1767; RV64-NEXT: vrgather.vi v10, v8, 1 1768; RV64-NEXT: vmul.vv v8, v8, v10 1769; RV64-NEXT: vmv.x.s a0, v8 1770; RV64-NEXT: mv a1, a2 1771; RV64-NEXT: call __muldi3 1772; RV64-NEXT: slli a0, a0, 56 1773; RV64-NEXT: srai a0, a0, 56 1774; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1775; RV64-NEXT: .cfi_restore ra 1776; RV64-NEXT: addi sp, sp, 16 1777; RV64-NEXT: .cfi_def_cfa_offset 0 1778; RV64-NEXT: ret 1779 %r = call i8 @llvm.vp.reduce.mul.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl) 1780 ret i8 %r 1781} 1782 1783declare i8 @llvm.vp.reduce.mul.v64i8(i8, <64 x i8>, <64 x i1>, i32) 1784 1785define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, i32 zeroext %evl) { 1786; RV32-LABEL: vpreduce_mul_v64i8: 1787; RV32: # %bb.0: 1788; RV32-NEXT: addi sp, sp, -16 1789; RV32-NEXT: .cfi_def_cfa_offset 16 1790; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1791; RV32-NEXT: .cfi_offset ra, -4 1792; RV32-NEXT: mv a2, a0 1793; RV32-NEXT: li a0, 32 1794; RV32-NEXT: lui a3, %hi(.LCPI72_0) 1795; RV32-NEXT: addi a3, a3, %lo(.LCPI72_0) 1796; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1797; RV32-NEXT: vle8.v v12, (a3) 1798; RV32-NEXT: vid.v v16 1799; RV32-NEXT: vmsltu.vx v14, v16, a1 1800; RV32-NEXT: li a3, 64 1801; RV32-NEXT: vsext.vf4 v16, v12 1802; RV32-NEXT: vmsltu.vx v12, v16, a1 1803; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1804; RV32-NEXT: vslideup.vi v14, v12, 4 1805; RV32-NEXT: vsetvli zero, a3, e8, m4, ta, ma 1806; RV32-NEXT: vmand.mm v0, v14, v0 1807; RV32-NEXT: vmv.v.i v12, 1 1808; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 1809; RV32-NEXT: vslidedown.vx v12, v8, a0 1810; RV32-NEXT: vmul.vv v8, v8, v12 1811; RV32-NEXT: vslidedown.vi v12, v8, 16 1812; RV32-NEXT: vmul.vv v8, v8, v12 1813; RV32-NEXT: vslidedown.vi v12, v8, 8 1814; RV32-NEXT: vmul.vv v8, v8, v12 1815; RV32-NEXT: vslidedown.vi v12, v8, 4 1816; RV32-NEXT: vmul.vv v8, v8, v12 1817; RV32-NEXT: vslidedown.vi v12, v8, 2 1818; RV32-NEXT: vmul.vv v8, v8, v12 1819; RV32-NEXT: vrgather.vi v12, v8, 1 1820; RV32-NEXT: vmul.vv v8, v8, v12 1821; RV32-NEXT: vmv.x.s a0, v8 1822; RV32-NEXT: mv a1, a2 1823; RV32-NEXT: call __mulsi3 1824; RV32-NEXT: slli a0, a0, 24 1825; RV32-NEXT: srai a0, a0, 24 1826; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1827; RV32-NEXT: .cfi_restore ra 1828; RV32-NEXT: addi sp, sp, 16 1829; RV32-NEXT: .cfi_def_cfa_offset 0 1830; RV32-NEXT: ret 1831; 1832; RV64-LABEL: vpreduce_mul_v64i8: 1833; RV64: # %bb.0: 1834; RV64-NEXT: addi sp, sp, -16 1835; RV64-NEXT: .cfi_def_cfa_offset 16 1836; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1837; RV64-NEXT: .cfi_offset ra, -8 1838; RV64-NEXT: mv a2, a0 1839; RV64-NEXT: li a0, 32 1840; RV64-NEXT: lui a3, %hi(.LCPI72_0) 1841; RV64-NEXT: addi a3, a3, %lo(.LCPI72_0) 1842; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1843; RV64-NEXT: vle8.v v12, (a3) 1844; RV64-NEXT: vid.v v16 1845; RV64-NEXT: vmsltu.vx v14, v16, a1 1846; RV64-NEXT: li a3, 64 1847; RV64-NEXT: vsext.vf4 v16, v12 1848; RV64-NEXT: vmsltu.vx v12, v16, a1 1849; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1850; RV64-NEXT: vslideup.vi v14, v12, 4 1851; RV64-NEXT: vsetvli zero, a3, e8, m4, ta, ma 1852; RV64-NEXT: vmand.mm v0, v14, v0 1853; RV64-NEXT: vmv.v.i v12, 1 1854; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 1855; RV64-NEXT: vslidedown.vx v12, v8, a0 1856; RV64-NEXT: vmul.vv v8, v8, v12 1857; RV64-NEXT: vslidedown.vi v12, v8, 16 1858; RV64-NEXT: vmul.vv v8, v8, v12 1859; RV64-NEXT: vslidedown.vi v12, v8, 8 1860; RV64-NEXT: vmul.vv v8, v8, v12 1861; RV64-NEXT: vslidedown.vi v12, v8, 4 1862; RV64-NEXT: vmul.vv v8, v8, v12 1863; RV64-NEXT: vslidedown.vi v12, v8, 2 1864; RV64-NEXT: vmul.vv v8, v8, v12 1865; RV64-NEXT: vrgather.vi v12, v8, 1 1866; RV64-NEXT: vmul.vv v8, v8, v12 1867; RV64-NEXT: vmv.x.s a0, v8 1868; RV64-NEXT: mv a1, a2 1869; RV64-NEXT: call __muldi3 1870; RV64-NEXT: slli a0, a0, 56 1871; RV64-NEXT: srai a0, a0, 56 1872; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1873; RV64-NEXT: .cfi_restore ra 1874; RV64-NEXT: addi sp, sp, 16 1875; RV64-NEXT: .cfi_def_cfa_offset 0 1876; RV64-NEXT: ret 1877 %r = call i8 @llvm.vp.reduce.mul.v64i8(i8 %s, <64 x i8> %v, <64 x i1> %m, i32 %evl) 1878 ret i8 %r 1879} 1880 1881; Test start value is the first element of a vector. 1882define zeroext i8 @front_ele_v4i8(<4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { 1883; CHECK-LABEL: front_ele_v4i8: 1884; CHECK: # %bb.0: 1885; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 1886; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t 1887; CHECK-NEXT: vmv.x.s a0, v8 1888; CHECK-NEXT: andi a0, a0, 255 1889; CHECK-NEXT: ret 1890 %s = extractelement <4 x i8> %v, i64 0 1891 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) 1892 ret i8 %r 1893} 1894 1895; Test start value is the first element of a vector which longer than M1. 1896declare i8 @llvm.vp.reduce.and.v32i8(i8, <32 x i8>, <32 x i1>, i32) 1897define zeroext i8 @front_ele_v32i8(<32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) { 1898; CHECK-LABEL: front_ele_v32i8: 1899; CHECK: # %bb.0: 1900; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 1901; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t 1902; CHECK-NEXT: vmv.x.s a0, v8 1903; CHECK-NEXT: andi a0, a0, 255 1904; CHECK-NEXT: ret 1905 %s = extractelement <32 x i8> %v, i64 0 1906 %r = call i8 @llvm.vp.reduce.and.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl) 1907 ret i8 %r 1908} 1909