1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ 3; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ 5; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>) 8 9define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) { 10; CHECK-LABEL: vreduce_add_nxv1i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 13; CHECK-NEXT: vmv.s.x v9, zero 14; CHECK-NEXT: vredsum.vs v8, v8, v9 15; CHECK-NEXT: vmv.x.s a0, v8 16; CHECK-NEXT: ret 17 %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v) 18 ret i8 %red 19} 20 21declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>) 22 23define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) { 24; CHECK-LABEL: vreduce_umax_nxv1i8: 25; CHECK: # %bb.0: 26; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 27; CHECK-NEXT: vredmaxu.vs v8, v8, v8 28; CHECK-NEXT: vmv.x.s a0, v8 29; CHECK-NEXT: ret 30 %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v) 31 ret i8 %red 32} 33 34declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>) 35 36define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) { 37; CHECK-LABEL: vreduce_smax_nxv1i8: 38; CHECK: # %bb.0: 39; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 40; CHECK-NEXT: vredmax.vs v8, v8, v8 41; CHECK-NEXT: vmv.x.s a0, v8 42; CHECK-NEXT: ret 43 %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v) 44 ret i8 %red 45} 46 47declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>) 48 49define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) { 50; CHECK-LABEL: vreduce_umin_nxv1i8: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 53; CHECK-NEXT: vredminu.vs v8, v8, v8 54; CHECK-NEXT: vmv.x.s a0, v8 55; CHECK-NEXT: ret 56 %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v) 57 ret i8 %red 58} 59 60declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>) 61 62define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) { 63; CHECK-LABEL: vreduce_smin_nxv1i8: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 66; CHECK-NEXT: vredmin.vs v8, v8, v8 67; CHECK-NEXT: vmv.x.s a0, v8 68; CHECK-NEXT: ret 69 %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v) 70 ret i8 %red 71} 72 73declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>) 74 75define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) { 76; CHECK-LABEL: vreduce_and_nxv1i8: 77; CHECK: # %bb.0: 78; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 79; CHECK-NEXT: vredand.vs v8, v8, v8 80; CHECK-NEXT: vmv.x.s a0, v8 81; CHECK-NEXT: ret 82 %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v) 83 ret i8 %red 84} 85 86declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>) 87 88define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) { 89; CHECK-LABEL: vreduce_or_nxv1i8: 90; CHECK: # %bb.0: 91; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 92; CHECK-NEXT: vredor.vs v8, v8, v8 93; CHECK-NEXT: vmv.x.s a0, v8 94; CHECK-NEXT: ret 95 %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v) 96 ret i8 %red 97} 98 99declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>) 100 101define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) { 102; CHECK-LABEL: vreduce_xor_nxv1i8: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 105; CHECK-NEXT: vmv.s.x v9, zero 106; CHECK-NEXT: vredxor.vs v8, v8, v9 107; CHECK-NEXT: vmv.x.s a0, v8 108; CHECK-NEXT: ret 109 %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v) 110 ret i8 %red 111} 112 113declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>) 114 115define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) { 116; CHECK-LABEL: vreduce_add_nxv2i8: 117; CHECK: # %bb.0: 118; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 119; CHECK-NEXT: vmv.s.x v9, zero 120; CHECK-NEXT: vredsum.vs v8, v8, v9 121; CHECK-NEXT: vmv.x.s a0, v8 122; CHECK-NEXT: ret 123 %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v) 124 ret i8 %red 125} 126 127declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>) 128 129define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) { 130; CHECK-LABEL: vreduce_umax_nxv2i8: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 133; CHECK-NEXT: vredmaxu.vs v8, v8, v8 134; CHECK-NEXT: vmv.x.s a0, v8 135; CHECK-NEXT: ret 136 %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v) 137 ret i8 %red 138} 139 140declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>) 141 142define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) { 143; CHECK-LABEL: vreduce_smax_nxv2i8: 144; CHECK: # %bb.0: 145; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 146; CHECK-NEXT: vredmax.vs v8, v8, v8 147; CHECK-NEXT: vmv.x.s a0, v8 148; CHECK-NEXT: ret 149 %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v) 150 ret i8 %red 151} 152 153declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>) 154 155define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) { 156; CHECK-LABEL: vreduce_umin_nxv2i8: 157; CHECK: # %bb.0: 158; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 159; CHECK-NEXT: vredminu.vs v8, v8, v8 160; CHECK-NEXT: vmv.x.s a0, v8 161; CHECK-NEXT: ret 162 %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v) 163 ret i8 %red 164} 165 166declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>) 167 168define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) { 169; CHECK-LABEL: vreduce_smin_nxv2i8: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 172; CHECK-NEXT: vredmin.vs v8, v8, v8 173; CHECK-NEXT: vmv.x.s a0, v8 174; CHECK-NEXT: ret 175 %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v) 176 ret i8 %red 177} 178 179declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>) 180 181define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) { 182; CHECK-LABEL: vreduce_and_nxv2i8: 183; CHECK: # %bb.0: 184; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 185; CHECK-NEXT: vredand.vs v8, v8, v8 186; CHECK-NEXT: vmv.x.s a0, v8 187; CHECK-NEXT: ret 188 %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v) 189 ret i8 %red 190} 191 192declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>) 193 194define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) { 195; CHECK-LABEL: vreduce_or_nxv2i8: 196; CHECK: # %bb.0: 197; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 198; CHECK-NEXT: vredor.vs v8, v8, v8 199; CHECK-NEXT: vmv.x.s a0, v8 200; CHECK-NEXT: ret 201 %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v) 202 ret i8 %red 203} 204 205declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>) 206 207define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) { 208; CHECK-LABEL: vreduce_xor_nxv2i8: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 211; CHECK-NEXT: vmv.s.x v9, zero 212; CHECK-NEXT: vredxor.vs v8, v8, v9 213; CHECK-NEXT: vmv.x.s a0, v8 214; CHECK-NEXT: ret 215 %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v) 216 ret i8 %red 217} 218 219declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>) 220 221define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) { 222; CHECK-LABEL: vreduce_add_nxv4i8: 223; CHECK: # %bb.0: 224; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 225; CHECK-NEXT: vmv.s.x v9, zero 226; CHECK-NEXT: vredsum.vs v8, v8, v9 227; CHECK-NEXT: vmv.x.s a0, v8 228; CHECK-NEXT: ret 229 %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v) 230 ret i8 %red 231} 232 233declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>) 234 235define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) { 236; CHECK-LABEL: vreduce_umax_nxv4i8: 237; CHECK: # %bb.0: 238; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 239; CHECK-NEXT: vredmaxu.vs v8, v8, v8 240; CHECK-NEXT: vmv.x.s a0, v8 241; CHECK-NEXT: ret 242 %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v) 243 ret i8 %red 244} 245 246declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>) 247 248define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) { 249; CHECK-LABEL: vreduce_smax_nxv4i8: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 252; CHECK-NEXT: vredmax.vs v8, v8, v8 253; CHECK-NEXT: vmv.x.s a0, v8 254; CHECK-NEXT: ret 255 %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v) 256 ret i8 %red 257} 258 259declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>) 260 261define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) { 262; CHECK-LABEL: vreduce_umin_nxv4i8: 263; CHECK: # %bb.0: 264; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 265; CHECK-NEXT: vredminu.vs v8, v8, v8 266; CHECK-NEXT: vmv.x.s a0, v8 267; CHECK-NEXT: ret 268 %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v) 269 ret i8 %red 270} 271 272declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>) 273 274define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) { 275; CHECK-LABEL: vreduce_smin_nxv4i8: 276; CHECK: # %bb.0: 277; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 278; CHECK-NEXT: vredmin.vs v8, v8, v8 279; CHECK-NEXT: vmv.x.s a0, v8 280; CHECK-NEXT: ret 281 %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v) 282 ret i8 %red 283} 284 285declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>) 286 287define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) { 288; CHECK-LABEL: vreduce_and_nxv4i8: 289; CHECK: # %bb.0: 290; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 291; CHECK-NEXT: vredand.vs v8, v8, v8 292; CHECK-NEXT: vmv.x.s a0, v8 293; CHECK-NEXT: ret 294 %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v) 295 ret i8 %red 296} 297 298declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>) 299 300define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) { 301; CHECK-LABEL: vreduce_or_nxv4i8: 302; CHECK: # %bb.0: 303; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 304; CHECK-NEXT: vredor.vs v8, v8, v8 305; CHECK-NEXT: vmv.x.s a0, v8 306; CHECK-NEXT: ret 307 %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v) 308 ret i8 %red 309} 310 311declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>) 312 313define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) { 314; CHECK-LABEL: vreduce_xor_nxv4i8: 315; CHECK: # %bb.0: 316; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 317; CHECK-NEXT: vmv.s.x v9, zero 318; CHECK-NEXT: vredxor.vs v8, v8, v9 319; CHECK-NEXT: vmv.x.s a0, v8 320; CHECK-NEXT: ret 321 %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v) 322 ret i8 %red 323} 324 325declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>) 326 327define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) { 328; CHECK-LABEL: vreduce_add_nxv1i16: 329; CHECK: # %bb.0: 330; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 331; CHECK-NEXT: vmv.s.x v9, zero 332; CHECK-NEXT: vredsum.vs v8, v8, v9 333; CHECK-NEXT: vmv.x.s a0, v8 334; CHECK-NEXT: ret 335 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v) 336 ret i16 %red 337} 338 339define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) { 340; CHECK-LABEL: vwreduce_add_nxv1i8: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 343; CHECK-NEXT: vmv.s.x v9, zero 344; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 345; CHECK-NEXT: vwredsum.vs v8, v8, v9 346; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 347; CHECK-NEXT: vmv.x.s a0, v8 348; CHECK-NEXT: ret 349 %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16> 350 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e) 351 ret i16 %red 352} 353 354define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) { 355; CHECK-LABEL: vwreduce_uadd_nxv1i8: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 358; CHECK-NEXT: vmv.s.x v9, zero 359; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 360; CHECK-NEXT: vwredsum.vs v8, v8, v9 361; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 362; CHECK-NEXT: vmv.x.s a0, v8 363; CHECK-NEXT: ret 364 %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16> 365 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e) 366 ret i16 %red 367} 368 369declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>) 370 371define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) { 372; CHECK-LABEL: vreduce_umax_nxv1i16: 373; CHECK: # %bb.0: 374; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 375; CHECK-NEXT: vredmaxu.vs v8, v8, v8 376; CHECK-NEXT: vmv.x.s a0, v8 377; CHECK-NEXT: ret 378 %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v) 379 ret i16 %red 380} 381 382declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>) 383 384define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) { 385; CHECK-LABEL: vreduce_smax_nxv1i16: 386; CHECK: # %bb.0: 387; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 388; CHECK-NEXT: vredmax.vs v8, v8, v8 389; CHECK-NEXT: vmv.x.s a0, v8 390; CHECK-NEXT: ret 391 %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v) 392 ret i16 %red 393} 394 395declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>) 396 397define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) { 398; CHECK-LABEL: vreduce_umin_nxv1i16: 399; CHECK: # %bb.0: 400; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 401; CHECK-NEXT: vredminu.vs v8, v8, v8 402; CHECK-NEXT: vmv.x.s a0, v8 403; CHECK-NEXT: ret 404 %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v) 405 ret i16 %red 406} 407 408declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>) 409 410define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) { 411; CHECK-LABEL: vreduce_smin_nxv1i16: 412; CHECK: # %bb.0: 413; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 414; CHECK-NEXT: vredmin.vs v8, v8, v8 415; CHECK-NEXT: vmv.x.s a0, v8 416; CHECK-NEXT: ret 417 %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v) 418 ret i16 %red 419} 420 421declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>) 422 423define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) { 424; CHECK-LABEL: vreduce_and_nxv1i16: 425; CHECK: # %bb.0: 426; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 427; CHECK-NEXT: vredand.vs v8, v8, v8 428; CHECK-NEXT: vmv.x.s a0, v8 429; CHECK-NEXT: ret 430 %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v) 431 ret i16 %red 432} 433 434declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>) 435 436define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) { 437; CHECK-LABEL: vreduce_or_nxv1i16: 438; CHECK: # %bb.0: 439; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 440; CHECK-NEXT: vredor.vs v8, v8, v8 441; CHECK-NEXT: vmv.x.s a0, v8 442; CHECK-NEXT: ret 443 %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v) 444 ret i16 %red 445} 446 447declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>) 448 449define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) { 450; CHECK-LABEL: vreduce_xor_nxv1i16: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 453; CHECK-NEXT: vmv.s.x v9, zero 454; CHECK-NEXT: vredxor.vs v8, v8, v9 455; CHECK-NEXT: vmv.x.s a0, v8 456; CHECK-NEXT: ret 457 %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v) 458 ret i16 %red 459} 460 461declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>) 462 463define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) { 464; CHECK-LABEL: vreduce_add_nxv2i16: 465; CHECK: # %bb.0: 466; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 467; CHECK-NEXT: vmv.s.x v9, zero 468; CHECK-NEXT: vredsum.vs v8, v8, v9 469; CHECK-NEXT: vmv.x.s a0, v8 470; CHECK-NEXT: ret 471 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v) 472 ret i16 %red 473} 474 475define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) { 476; CHECK-LABEL: vwreduce_add_nxv2i8: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 479; CHECK-NEXT: vmv.s.x v9, zero 480; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 481; CHECK-NEXT: vwredsum.vs v8, v8, v9 482; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 483; CHECK-NEXT: vmv.x.s a0, v8 484; CHECK-NEXT: ret 485 %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16> 486 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e) 487 ret i16 %red 488} 489 490define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) { 491; CHECK-LABEL: vwreduce_uadd_nxv2i8: 492; CHECK: # %bb.0: 493; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 494; CHECK-NEXT: vmv.s.x v9, zero 495; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 496; CHECK-NEXT: vwredsum.vs v8, v8, v9 497; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 498; CHECK-NEXT: vmv.x.s a0, v8 499; CHECK-NEXT: ret 500 %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16> 501 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e) 502 ret i16 %red 503} 504 505declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>) 506 507define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) { 508; CHECK-LABEL: vreduce_umax_nxv2i16: 509; CHECK: # %bb.0: 510; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 511; CHECK-NEXT: vredmaxu.vs v8, v8, v8 512; CHECK-NEXT: vmv.x.s a0, v8 513; CHECK-NEXT: ret 514 %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v) 515 ret i16 %red 516} 517 518declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>) 519 520define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) { 521; CHECK-LABEL: vreduce_smax_nxv2i16: 522; CHECK: # %bb.0: 523; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 524; CHECK-NEXT: vredmax.vs v8, v8, v8 525; CHECK-NEXT: vmv.x.s a0, v8 526; CHECK-NEXT: ret 527 %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v) 528 ret i16 %red 529} 530 531declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>) 532 533define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) { 534; CHECK-LABEL: vreduce_umin_nxv2i16: 535; CHECK: # %bb.0: 536; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 537; CHECK-NEXT: vredminu.vs v8, v8, v8 538; CHECK-NEXT: vmv.x.s a0, v8 539; CHECK-NEXT: ret 540 %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v) 541 ret i16 %red 542} 543 544declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>) 545 546define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) { 547; CHECK-LABEL: vreduce_smin_nxv2i16: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 550; CHECK-NEXT: vredmin.vs v8, v8, v8 551; CHECK-NEXT: vmv.x.s a0, v8 552; CHECK-NEXT: ret 553 %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v) 554 ret i16 %red 555} 556 557declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>) 558 559define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) { 560; CHECK-LABEL: vreduce_and_nxv2i16: 561; CHECK: # %bb.0: 562; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 563; CHECK-NEXT: vredand.vs v8, v8, v8 564; CHECK-NEXT: vmv.x.s a0, v8 565; CHECK-NEXT: ret 566 %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v) 567 ret i16 %red 568} 569 570declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>) 571 572define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) { 573; CHECK-LABEL: vreduce_or_nxv2i16: 574; CHECK: # %bb.0: 575; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 576; CHECK-NEXT: vredor.vs v8, v8, v8 577; CHECK-NEXT: vmv.x.s a0, v8 578; CHECK-NEXT: ret 579 %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v) 580 ret i16 %red 581} 582 583declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>) 584 585define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) { 586; CHECK-LABEL: vreduce_xor_nxv2i16: 587; CHECK: # %bb.0: 588; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 589; CHECK-NEXT: vmv.s.x v9, zero 590; CHECK-NEXT: vredxor.vs v8, v8, v9 591; CHECK-NEXT: vmv.x.s a0, v8 592; CHECK-NEXT: ret 593 %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v) 594 ret i16 %red 595} 596 597declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>) 598 599define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) { 600; CHECK-LABEL: vreduce_add_nxv4i16: 601; CHECK: # %bb.0: 602; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 603; CHECK-NEXT: vmv.s.x v9, zero 604; CHECK-NEXT: vredsum.vs v8, v8, v9 605; CHECK-NEXT: vmv.x.s a0, v8 606; CHECK-NEXT: ret 607 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v) 608 ret i16 %red 609} 610 611define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) { 612; CHECK-LABEL: vwreduce_add_nxv4i8: 613; CHECK: # %bb.0: 614; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 615; CHECK-NEXT: vmv.s.x v9, zero 616; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 617; CHECK-NEXT: vwredsum.vs v8, v8, v9 618; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 619; CHECK-NEXT: vmv.x.s a0, v8 620; CHECK-NEXT: ret 621 %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16> 622 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e) 623 ret i16 %red 624} 625 626define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) { 627; CHECK-LABEL: vwreduce_uadd_nxv4i8: 628; CHECK: # %bb.0: 629; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 630; CHECK-NEXT: vmv.s.x v9, zero 631; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 632; CHECK-NEXT: vwredsum.vs v8, v8, v9 633; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 634; CHECK-NEXT: vmv.x.s a0, v8 635; CHECK-NEXT: ret 636 %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16> 637 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e) 638 ret i16 %red 639} 640 641declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>) 642 643define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) { 644; CHECK-LABEL: vreduce_umax_nxv4i16: 645; CHECK: # %bb.0: 646; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 647; CHECK-NEXT: vredmaxu.vs v8, v8, v8 648; CHECK-NEXT: vmv.x.s a0, v8 649; CHECK-NEXT: ret 650 %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v) 651 ret i16 %red 652} 653 654declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>) 655 656define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) { 657; CHECK-LABEL: vreduce_smax_nxv4i16: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 660; CHECK-NEXT: vredmax.vs v8, v8, v8 661; CHECK-NEXT: vmv.x.s a0, v8 662; CHECK-NEXT: ret 663 %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v) 664 ret i16 %red 665} 666 667declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>) 668 669define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) { 670; CHECK-LABEL: vreduce_umin_nxv4i16: 671; CHECK: # %bb.0: 672; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 673; CHECK-NEXT: vredminu.vs v8, v8, v8 674; CHECK-NEXT: vmv.x.s a0, v8 675; CHECK-NEXT: ret 676 %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v) 677 ret i16 %red 678} 679 680declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>) 681 682define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) { 683; CHECK-LABEL: vreduce_smin_nxv4i16: 684; CHECK: # %bb.0: 685; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 686; CHECK-NEXT: vredmin.vs v8, v8, v8 687; CHECK-NEXT: vmv.x.s a0, v8 688; CHECK-NEXT: ret 689 %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v) 690 ret i16 %red 691} 692 693declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>) 694 695define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) { 696; CHECK-LABEL: vreduce_and_nxv4i16: 697; CHECK: # %bb.0: 698; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 699; CHECK-NEXT: vredand.vs v8, v8, v8 700; CHECK-NEXT: vmv.x.s a0, v8 701; CHECK-NEXT: ret 702 %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v) 703 ret i16 %red 704} 705 706declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>) 707 708define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) { 709; CHECK-LABEL: vreduce_or_nxv4i16: 710; CHECK: # %bb.0: 711; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 712; CHECK-NEXT: vredor.vs v8, v8, v8 713; CHECK-NEXT: vmv.x.s a0, v8 714; CHECK-NEXT: ret 715 %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v) 716 ret i16 %red 717} 718 719declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>) 720 721define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) { 722; CHECK-LABEL: vreduce_xor_nxv4i16: 723; CHECK: # %bb.0: 724; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 725; CHECK-NEXT: vmv.s.x v9, zero 726; CHECK-NEXT: vredxor.vs v8, v8, v9 727; CHECK-NEXT: vmv.x.s a0, v8 728; CHECK-NEXT: ret 729 %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v) 730 ret i16 %red 731} 732 733declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>) 734 735define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) { 736; CHECK-LABEL: vreduce_add_nxv1i32: 737; CHECK: # %bb.0: 738; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 739; CHECK-NEXT: vmv.s.x v9, zero 740; CHECK-NEXT: vredsum.vs v8, v8, v9 741; CHECK-NEXT: vmv.x.s a0, v8 742; CHECK-NEXT: ret 743 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v) 744 ret i32 %red 745} 746 747define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) { 748; CHECK-LABEL: vwreduce_add_nxv1i16: 749; CHECK: # %bb.0: 750; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 751; CHECK-NEXT: vmv.s.x v9, zero 752; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 753; CHECK-NEXT: vwredsum.vs v8, v8, v9 754; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 755; CHECK-NEXT: vmv.x.s a0, v8 756; CHECK-NEXT: ret 757 %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32> 758 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e) 759 ret i32 %red 760} 761 762define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) { 763; CHECK-LABEL: vwreduce_uadd_nxv1i16: 764; CHECK: # %bb.0: 765; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 766; CHECK-NEXT: vmv.s.x v9, zero 767; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 768; CHECK-NEXT: vwredsumu.vs v8, v8, v9 769; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 770; CHECK-NEXT: vmv.x.s a0, v8 771; CHECK-NEXT: ret 772 %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32> 773 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e) 774 ret i32 %red 775} 776 777declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>) 778 779define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) { 780; CHECK-LABEL: vreduce_umax_nxv1i32: 781; CHECK: # %bb.0: 782; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 783; CHECK-NEXT: vredmaxu.vs v8, v8, v8 784; CHECK-NEXT: vmv.x.s a0, v8 785; CHECK-NEXT: ret 786 %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v) 787 ret i32 %red 788} 789 790declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>) 791 792define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) { 793; CHECK-LABEL: vreduce_smax_nxv1i32: 794; CHECK: # %bb.0: 795; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 796; CHECK-NEXT: vredmax.vs v8, v8, v8 797; CHECK-NEXT: vmv.x.s a0, v8 798; CHECK-NEXT: ret 799 %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v) 800 ret i32 %red 801} 802 803declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>) 804 805define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) { 806; CHECK-LABEL: vreduce_umin_nxv1i32: 807; CHECK: # %bb.0: 808; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 809; CHECK-NEXT: vredminu.vs v8, v8, v8 810; CHECK-NEXT: vmv.x.s a0, v8 811; CHECK-NEXT: ret 812 %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v) 813 ret i32 %red 814} 815 816declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>) 817 818define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) { 819; CHECK-LABEL: vreduce_smin_nxv1i32: 820; CHECK: # %bb.0: 821; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 822; CHECK-NEXT: vredmin.vs v8, v8, v8 823; CHECK-NEXT: vmv.x.s a0, v8 824; CHECK-NEXT: ret 825 %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v) 826 ret i32 %red 827} 828 829declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>) 830 831define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) { 832; CHECK-LABEL: vreduce_and_nxv1i32: 833; CHECK: # %bb.0: 834; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 835; CHECK-NEXT: vredand.vs v8, v8, v8 836; CHECK-NEXT: vmv.x.s a0, v8 837; CHECK-NEXT: ret 838 %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v) 839 ret i32 %red 840} 841 842declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>) 843 844define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) { 845; CHECK-LABEL: vreduce_or_nxv1i32: 846; CHECK: # %bb.0: 847; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 848; CHECK-NEXT: vredor.vs v8, v8, v8 849; CHECK-NEXT: vmv.x.s a0, v8 850; CHECK-NEXT: ret 851 %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v) 852 ret i32 %red 853} 854 855declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>) 856 857define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) { 858; CHECK-LABEL: vreduce_xor_nxv1i32: 859; CHECK: # %bb.0: 860; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 861; CHECK-NEXT: vmv.s.x v9, zero 862; CHECK-NEXT: vredxor.vs v8, v8, v9 863; CHECK-NEXT: vmv.x.s a0, v8 864; CHECK-NEXT: ret 865 %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v) 866 ret i32 %red 867} 868 869declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>) 870 871define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) { 872; CHECK-LABEL: vreduce_add_nxv2i32: 873; CHECK: # %bb.0: 874; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 875; CHECK-NEXT: vmv.s.x v9, zero 876; CHECK-NEXT: vredsum.vs v8, v8, v9 877; CHECK-NEXT: vmv.x.s a0, v8 878; CHECK-NEXT: ret 879 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v) 880 ret i32 %red 881} 882 883define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) { 884; CHECK-LABEL: vwreduce_add_nxv2i16: 885; CHECK: # %bb.0: 886; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 887; CHECK-NEXT: vmv.s.x v9, zero 888; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 889; CHECK-NEXT: vwredsum.vs v8, v8, v9 890; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 891; CHECK-NEXT: vmv.x.s a0, v8 892; CHECK-NEXT: ret 893 %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32> 894 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e) 895 ret i32 %red 896} 897 898define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) { 899; CHECK-LABEL: vwreduce_uadd_nxv2i16: 900; CHECK: # %bb.0: 901; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 902; CHECK-NEXT: vmv.s.x v9, zero 903; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 904; CHECK-NEXT: vwredsumu.vs v8, v8, v9 905; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 906; CHECK-NEXT: vmv.x.s a0, v8 907; CHECK-NEXT: ret 908 %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32> 909 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e) 910 ret i32 %red 911} 912 913declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>) 914 915define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) { 916; CHECK-LABEL: vreduce_umax_nxv2i32: 917; CHECK: # %bb.0: 918; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 919; CHECK-NEXT: vredmaxu.vs v8, v8, v8 920; CHECK-NEXT: vmv.x.s a0, v8 921; CHECK-NEXT: ret 922 %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v) 923 ret i32 %red 924} 925 926declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>) 927 928define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) { 929; CHECK-LABEL: vreduce_smax_nxv2i32: 930; CHECK: # %bb.0: 931; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 932; CHECK-NEXT: vredmax.vs v8, v8, v8 933; CHECK-NEXT: vmv.x.s a0, v8 934; CHECK-NEXT: ret 935 %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v) 936 ret i32 %red 937} 938 939declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>) 940 941define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) { 942; CHECK-LABEL: vreduce_umin_nxv2i32: 943; CHECK: # %bb.0: 944; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 945; CHECK-NEXT: vredminu.vs v8, v8, v8 946; CHECK-NEXT: vmv.x.s a0, v8 947; CHECK-NEXT: ret 948 %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v) 949 ret i32 %red 950} 951 952declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>) 953 954define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) { 955; CHECK-LABEL: vreduce_smin_nxv2i32: 956; CHECK: # %bb.0: 957; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 958; CHECK-NEXT: vredmin.vs v8, v8, v8 959; CHECK-NEXT: vmv.x.s a0, v8 960; CHECK-NEXT: ret 961 %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v) 962 ret i32 %red 963} 964 965declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>) 966 967define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) { 968; CHECK-LABEL: vreduce_and_nxv2i32: 969; CHECK: # %bb.0: 970; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 971; CHECK-NEXT: vredand.vs v8, v8, v8 972; CHECK-NEXT: vmv.x.s a0, v8 973; CHECK-NEXT: ret 974 %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v) 975 ret i32 %red 976} 977 978declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>) 979 980define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) { 981; CHECK-LABEL: vreduce_or_nxv2i32: 982; CHECK: # %bb.0: 983; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 984; CHECK-NEXT: vredor.vs v8, v8, v8 985; CHECK-NEXT: vmv.x.s a0, v8 986; CHECK-NEXT: ret 987 %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v) 988 ret i32 %red 989} 990 991declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>) 992 993define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) { 994; CHECK-LABEL: vreduce_xor_nxv2i32: 995; CHECK: # %bb.0: 996; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 997; CHECK-NEXT: vmv.s.x v9, zero 998; CHECK-NEXT: vredxor.vs v8, v8, v9 999; CHECK-NEXT: vmv.x.s a0, v8 1000; CHECK-NEXT: ret 1001 %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v) 1002 ret i32 %red 1003} 1004 1005declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) 1006 1007define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) { 1008; CHECK-LABEL: vreduce_add_nxv4i32: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1011; CHECK-NEXT: vmv.s.x v10, zero 1012; CHECK-NEXT: vredsum.vs v8, v8, v10 1013; CHECK-NEXT: vmv.x.s a0, v8 1014; CHECK-NEXT: ret 1015 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v) 1016 ret i32 %red 1017} 1018 1019define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) { 1020; CHECK-LABEL: vwreduce_add_nxv4i16: 1021; CHECK: # %bb.0: 1022; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1023; CHECK-NEXT: vmv.s.x v9, zero 1024; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1025; CHECK-NEXT: vwredsum.vs v8, v8, v9 1026; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1027; CHECK-NEXT: vmv.x.s a0, v8 1028; CHECK-NEXT: ret 1029 %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32> 1030 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e) 1031 ret i32 %red 1032} 1033 1034define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) { 1035; CHECK-LABEL: vwreduce_uadd_nxv4i16: 1036; CHECK: # %bb.0: 1037; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1038; CHECK-NEXT: vmv.s.x v9, zero 1039; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1040; CHECK-NEXT: vwredsumu.vs v8, v8, v9 1041; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1042; CHECK-NEXT: vmv.x.s a0, v8 1043; CHECK-NEXT: ret 1044 %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32> 1045 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e) 1046 ret i32 %red 1047} 1048 1049declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>) 1050 1051define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) { 1052; CHECK-LABEL: vreduce_umax_nxv4i32: 1053; CHECK: # %bb.0: 1054; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1055; CHECK-NEXT: vredmaxu.vs v8, v8, v8 1056; CHECK-NEXT: vmv.x.s a0, v8 1057; CHECK-NEXT: ret 1058 %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v) 1059 ret i32 %red 1060} 1061 1062declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>) 1063 1064define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) { 1065; CHECK-LABEL: vreduce_smax_nxv4i32: 1066; CHECK: # %bb.0: 1067; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1068; CHECK-NEXT: vredmax.vs v8, v8, v8 1069; CHECK-NEXT: vmv.x.s a0, v8 1070; CHECK-NEXT: ret 1071 %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v) 1072 ret i32 %red 1073} 1074 1075declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>) 1076 1077define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) { 1078; CHECK-LABEL: vreduce_umin_nxv4i32: 1079; CHECK: # %bb.0: 1080; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1081; CHECK-NEXT: vredminu.vs v8, v8, v8 1082; CHECK-NEXT: vmv.x.s a0, v8 1083; CHECK-NEXT: ret 1084 %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v) 1085 ret i32 %red 1086} 1087 1088declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>) 1089 1090define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) { 1091; CHECK-LABEL: vreduce_smin_nxv4i32: 1092; CHECK: # %bb.0: 1093; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1094; CHECK-NEXT: vredmin.vs v8, v8, v8 1095; CHECK-NEXT: vmv.x.s a0, v8 1096; CHECK-NEXT: ret 1097 %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v) 1098 ret i32 %red 1099} 1100 1101declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>) 1102 1103define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) { 1104; CHECK-LABEL: vreduce_and_nxv4i32: 1105; CHECK: # %bb.0: 1106; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1107; CHECK-NEXT: vredand.vs v8, v8, v8 1108; CHECK-NEXT: vmv.x.s a0, v8 1109; CHECK-NEXT: ret 1110 %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v) 1111 ret i32 %red 1112} 1113 1114declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>) 1115 1116define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) { 1117; CHECK-LABEL: vreduce_or_nxv4i32: 1118; CHECK: # %bb.0: 1119; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1120; CHECK-NEXT: vredor.vs v8, v8, v8 1121; CHECK-NEXT: vmv.x.s a0, v8 1122; CHECK-NEXT: ret 1123 %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v) 1124 ret i32 %red 1125} 1126 1127declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>) 1128 1129define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) { 1130; CHECK-LABEL: vreduce_xor_nxv4i32: 1131; CHECK: # %bb.0: 1132; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1133; CHECK-NEXT: vmv.s.x v10, zero 1134; CHECK-NEXT: vredxor.vs v8, v8, v10 1135; CHECK-NEXT: vmv.x.s a0, v8 1136; CHECK-NEXT: ret 1137 %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v) 1138 ret i32 %red 1139} 1140 1141declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>) 1142 1143define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) { 1144; RV32-LABEL: vreduce_add_nxv1i64: 1145; RV32: # %bb.0: 1146; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1147; RV32-NEXT: vmv.s.x v9, zero 1148; RV32-NEXT: li a1, 32 1149; RV32-NEXT: vredsum.vs v8, v8, v9 1150; RV32-NEXT: vmv.x.s a0, v8 1151; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1152; RV32-NEXT: vsrl.vx v8, v8, a1 1153; RV32-NEXT: vmv.x.s a1, v8 1154; RV32-NEXT: ret 1155; 1156; RV64-LABEL: vreduce_add_nxv1i64: 1157; RV64: # %bb.0: 1158; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1159; RV64-NEXT: vmv.s.x v9, zero 1160; RV64-NEXT: vredsum.vs v8, v8, v9 1161; RV64-NEXT: vmv.x.s a0, v8 1162; RV64-NEXT: ret 1163 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v) 1164 ret i64 %red 1165} 1166 1167define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) { 1168; RV32-LABEL: vwreduce_add_nxv1i32: 1169; RV32: # %bb.0: 1170; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1171; RV32-NEXT: vmv.s.x v9, zero 1172; RV32-NEXT: li a1, 32 1173; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 1174; RV32-NEXT: vwredsum.vs v8, v8, v9 1175; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1176; RV32-NEXT: vmv.x.s a0, v8 1177; RV32-NEXT: vsrl.vx v8, v8, a1 1178; RV32-NEXT: vmv.x.s a1, v8 1179; RV32-NEXT: ret 1180; 1181; RV64-LABEL: vwreduce_add_nxv1i32: 1182; RV64: # %bb.0: 1183; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1184; RV64-NEXT: vmv.s.x v9, zero 1185; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1186; RV64-NEXT: vwredsum.vs v8, v8, v9 1187; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1188; RV64-NEXT: vmv.x.s a0, v8 1189; RV64-NEXT: ret 1190 %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64> 1191 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e) 1192 ret i64 %red 1193} 1194 1195define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) { 1196; RV32-LABEL: vwreduce_uadd_nxv1i32: 1197; RV32: # %bb.0: 1198; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1199; RV32-NEXT: vmv.s.x v9, zero 1200; RV32-NEXT: li a1, 32 1201; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 1202; RV32-NEXT: vwredsumu.vs v8, v8, v9 1203; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1204; RV32-NEXT: vmv.x.s a0, v8 1205; RV32-NEXT: vsrl.vx v8, v8, a1 1206; RV32-NEXT: vmv.x.s a1, v8 1207; RV32-NEXT: ret 1208; 1209; RV64-LABEL: vwreduce_uadd_nxv1i32: 1210; RV64: # %bb.0: 1211; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1212; RV64-NEXT: vmv.s.x v9, zero 1213; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1214; RV64-NEXT: vwredsumu.vs v8, v8, v9 1215; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1216; RV64-NEXT: vmv.x.s a0, v8 1217; RV64-NEXT: ret 1218 %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64> 1219 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e) 1220 ret i64 %red 1221} 1222 1223declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>) 1224 1225define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) { 1226; RV32-LABEL: vreduce_umax_nxv1i64: 1227; RV32: # %bb.0: 1228; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1229; RV32-NEXT: vredmaxu.vs v8, v8, v8 1230; RV32-NEXT: li a0, 32 1231; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1232; RV32-NEXT: vsrl.vx v9, v8, a0 1233; RV32-NEXT: vmv.x.s a1, v9 1234; RV32-NEXT: vmv.x.s a0, v8 1235; RV32-NEXT: ret 1236; 1237; RV64-LABEL: vreduce_umax_nxv1i64: 1238; RV64: # %bb.0: 1239; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1240; RV64-NEXT: vredmaxu.vs v8, v8, v8 1241; RV64-NEXT: vmv.x.s a0, v8 1242; RV64-NEXT: ret 1243 %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v) 1244 ret i64 %red 1245} 1246 1247declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>) 1248 1249define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) { 1250; RV32-LABEL: vreduce_smax_nxv1i64: 1251; RV32: # %bb.0: 1252; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1253; RV32-NEXT: vredmax.vs v8, v8, v8 1254; RV32-NEXT: li a0, 32 1255; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1256; RV32-NEXT: vsrl.vx v9, v8, a0 1257; RV32-NEXT: vmv.x.s a1, v9 1258; RV32-NEXT: vmv.x.s a0, v8 1259; RV32-NEXT: ret 1260; 1261; RV64-LABEL: vreduce_smax_nxv1i64: 1262; RV64: # %bb.0: 1263; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1264; RV64-NEXT: vredmax.vs v8, v8, v8 1265; RV64-NEXT: vmv.x.s a0, v8 1266; RV64-NEXT: ret 1267 %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v) 1268 ret i64 %red 1269} 1270 1271declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>) 1272 1273define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) { 1274; RV32-LABEL: vreduce_umin_nxv1i64: 1275; RV32: # %bb.0: 1276; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1277; RV32-NEXT: vredminu.vs v8, v8, v8 1278; RV32-NEXT: li a0, 32 1279; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1280; RV32-NEXT: vsrl.vx v9, v8, a0 1281; RV32-NEXT: vmv.x.s a1, v9 1282; RV32-NEXT: vmv.x.s a0, v8 1283; RV32-NEXT: ret 1284; 1285; RV64-LABEL: vreduce_umin_nxv1i64: 1286; RV64: # %bb.0: 1287; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1288; RV64-NEXT: vredminu.vs v8, v8, v8 1289; RV64-NEXT: vmv.x.s a0, v8 1290; RV64-NEXT: ret 1291 %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v) 1292 ret i64 %red 1293} 1294 1295declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>) 1296 1297define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) { 1298; RV32-LABEL: vreduce_smin_nxv1i64: 1299; RV32: # %bb.0: 1300; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1301; RV32-NEXT: vredmin.vs v8, v8, v8 1302; RV32-NEXT: li a0, 32 1303; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1304; RV32-NEXT: vsrl.vx v9, v8, a0 1305; RV32-NEXT: vmv.x.s a1, v9 1306; RV32-NEXT: vmv.x.s a0, v8 1307; RV32-NEXT: ret 1308; 1309; RV64-LABEL: vreduce_smin_nxv1i64: 1310; RV64: # %bb.0: 1311; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1312; RV64-NEXT: vredmin.vs v8, v8, v8 1313; RV64-NEXT: vmv.x.s a0, v8 1314; RV64-NEXT: ret 1315 %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v) 1316 ret i64 %red 1317} 1318 1319declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>) 1320 1321define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) { 1322; RV32-LABEL: vreduce_and_nxv1i64: 1323; RV32: # %bb.0: 1324; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1325; RV32-NEXT: vredand.vs v8, v8, v8 1326; RV32-NEXT: li a0, 32 1327; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1328; RV32-NEXT: vsrl.vx v9, v8, a0 1329; RV32-NEXT: vmv.x.s a1, v9 1330; RV32-NEXT: vmv.x.s a0, v8 1331; RV32-NEXT: ret 1332; 1333; RV64-LABEL: vreduce_and_nxv1i64: 1334; RV64: # %bb.0: 1335; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1336; RV64-NEXT: vredand.vs v8, v8, v8 1337; RV64-NEXT: vmv.x.s a0, v8 1338; RV64-NEXT: ret 1339 %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v) 1340 ret i64 %red 1341} 1342 1343declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>) 1344 1345define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) { 1346; RV32-LABEL: vreduce_or_nxv1i64: 1347; RV32: # %bb.0: 1348; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1349; RV32-NEXT: vredor.vs v8, v8, v8 1350; RV32-NEXT: li a0, 32 1351; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1352; RV32-NEXT: vsrl.vx v9, v8, a0 1353; RV32-NEXT: vmv.x.s a1, v9 1354; RV32-NEXT: vmv.x.s a0, v8 1355; RV32-NEXT: ret 1356; 1357; RV64-LABEL: vreduce_or_nxv1i64: 1358; RV64: # %bb.0: 1359; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1360; RV64-NEXT: vredor.vs v8, v8, v8 1361; RV64-NEXT: vmv.x.s a0, v8 1362; RV64-NEXT: ret 1363 %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v) 1364 ret i64 %red 1365} 1366 1367declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>) 1368 1369define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) { 1370; RV32-LABEL: vreduce_xor_nxv1i64: 1371; RV32: # %bb.0: 1372; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1373; RV32-NEXT: vmv.s.x v9, zero 1374; RV32-NEXT: li a1, 32 1375; RV32-NEXT: vredxor.vs v8, v8, v9 1376; RV32-NEXT: vmv.x.s a0, v8 1377; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1378; RV32-NEXT: vsrl.vx v8, v8, a1 1379; RV32-NEXT: vmv.x.s a1, v8 1380; RV32-NEXT: ret 1381; 1382; RV64-LABEL: vreduce_xor_nxv1i64: 1383; RV64: # %bb.0: 1384; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1385; RV64-NEXT: vmv.s.x v9, zero 1386; RV64-NEXT: vredxor.vs v8, v8, v9 1387; RV64-NEXT: vmv.x.s a0, v8 1388; RV64-NEXT: ret 1389 %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v) 1390 ret i64 %red 1391} 1392 1393declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>) 1394 1395define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) { 1396; RV32-LABEL: vreduce_add_nxv2i64: 1397; RV32: # %bb.0: 1398; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1399; RV32-NEXT: vmv.s.x v10, zero 1400; RV32-NEXT: li a1, 32 1401; RV32-NEXT: vredsum.vs v8, v8, v10 1402; RV32-NEXT: vmv.x.s a0, v8 1403; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1404; RV32-NEXT: vsrl.vx v8, v8, a1 1405; RV32-NEXT: vmv.x.s a1, v8 1406; RV32-NEXT: ret 1407; 1408; RV64-LABEL: vreduce_add_nxv2i64: 1409; RV64: # %bb.0: 1410; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1411; RV64-NEXT: vmv.s.x v10, zero 1412; RV64-NEXT: vredsum.vs v8, v8, v10 1413; RV64-NEXT: vmv.x.s a0, v8 1414; RV64-NEXT: ret 1415 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v) 1416 ret i64 %red 1417} 1418 1419define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) { 1420; RV32-LABEL: vwreduce_add_nxv2i32: 1421; RV32: # %bb.0: 1422; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1423; RV32-NEXT: vmv.s.x v9, zero 1424; RV32-NEXT: li a1, 32 1425; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1426; RV32-NEXT: vwredsum.vs v8, v8, v9 1427; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1428; RV32-NEXT: vmv.x.s a0, v8 1429; RV32-NEXT: vsrl.vx v8, v8, a1 1430; RV32-NEXT: vmv.x.s a1, v8 1431; RV32-NEXT: ret 1432; 1433; RV64-LABEL: vwreduce_add_nxv2i32: 1434; RV64: # %bb.0: 1435; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1436; RV64-NEXT: vmv.s.x v9, zero 1437; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1438; RV64-NEXT: vwredsum.vs v8, v8, v9 1439; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma 1440; RV64-NEXT: vmv.x.s a0, v8 1441; RV64-NEXT: ret 1442 %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64> 1443 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e) 1444 ret i64 %red 1445} 1446 1447define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) { 1448; RV32-LABEL: vwreduce_uadd_nxv2i32: 1449; RV32: # %bb.0: 1450; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1451; RV32-NEXT: vmv.s.x v9, zero 1452; RV32-NEXT: li a1, 32 1453; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1454; RV32-NEXT: vwredsumu.vs v8, v8, v9 1455; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1456; RV32-NEXT: vmv.x.s a0, v8 1457; RV32-NEXT: vsrl.vx v8, v8, a1 1458; RV32-NEXT: vmv.x.s a1, v8 1459; RV32-NEXT: ret 1460; 1461; RV64-LABEL: vwreduce_uadd_nxv2i32: 1462; RV64: # %bb.0: 1463; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1464; RV64-NEXT: vmv.s.x v9, zero 1465; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1466; RV64-NEXT: vwredsumu.vs v8, v8, v9 1467; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma 1468; RV64-NEXT: vmv.x.s a0, v8 1469; RV64-NEXT: ret 1470 %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64> 1471 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e) 1472 ret i64 %red 1473} 1474 1475declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>) 1476 1477define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) { 1478; RV32-LABEL: vreduce_umax_nxv2i64: 1479; RV32: # %bb.0: 1480; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1481; RV32-NEXT: vredmaxu.vs v8, v8, v8 1482; RV32-NEXT: li a1, 32 1483; RV32-NEXT: vmv.x.s a0, v8 1484; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1485; RV32-NEXT: vsrl.vx v8, v8, a1 1486; RV32-NEXT: vmv.x.s a1, v8 1487; RV32-NEXT: ret 1488; 1489; RV64-LABEL: vreduce_umax_nxv2i64: 1490; RV64: # %bb.0: 1491; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1492; RV64-NEXT: vredmaxu.vs v8, v8, v8 1493; RV64-NEXT: vmv.x.s a0, v8 1494; RV64-NEXT: ret 1495 %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v) 1496 ret i64 %red 1497} 1498 1499declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>) 1500 1501define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) { 1502; RV32-LABEL: vreduce_smax_nxv2i64: 1503; RV32: # %bb.0: 1504; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1505; RV32-NEXT: vredmax.vs v8, v8, v8 1506; RV32-NEXT: li a1, 32 1507; RV32-NEXT: vmv.x.s a0, v8 1508; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1509; RV32-NEXT: vsrl.vx v8, v8, a1 1510; RV32-NEXT: vmv.x.s a1, v8 1511; RV32-NEXT: ret 1512; 1513; RV64-LABEL: vreduce_smax_nxv2i64: 1514; RV64: # %bb.0: 1515; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1516; RV64-NEXT: vredmax.vs v8, v8, v8 1517; RV64-NEXT: vmv.x.s a0, v8 1518; RV64-NEXT: ret 1519 %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v) 1520 ret i64 %red 1521} 1522 1523declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>) 1524 1525define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) { 1526; RV32-LABEL: vreduce_umin_nxv2i64: 1527; RV32: # %bb.0: 1528; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1529; RV32-NEXT: vredminu.vs v8, v8, v8 1530; RV32-NEXT: li a1, 32 1531; RV32-NEXT: vmv.x.s a0, v8 1532; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1533; RV32-NEXT: vsrl.vx v8, v8, a1 1534; RV32-NEXT: vmv.x.s a1, v8 1535; RV32-NEXT: ret 1536; 1537; RV64-LABEL: vreduce_umin_nxv2i64: 1538; RV64: # %bb.0: 1539; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1540; RV64-NEXT: vredminu.vs v8, v8, v8 1541; RV64-NEXT: vmv.x.s a0, v8 1542; RV64-NEXT: ret 1543 %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v) 1544 ret i64 %red 1545} 1546 1547declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>) 1548 1549define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) { 1550; RV32-LABEL: vreduce_smin_nxv2i64: 1551; RV32: # %bb.0: 1552; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1553; RV32-NEXT: vredmin.vs v8, v8, v8 1554; RV32-NEXT: li a1, 32 1555; RV32-NEXT: vmv.x.s a0, v8 1556; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1557; RV32-NEXT: vsrl.vx v8, v8, a1 1558; RV32-NEXT: vmv.x.s a1, v8 1559; RV32-NEXT: ret 1560; 1561; RV64-LABEL: vreduce_smin_nxv2i64: 1562; RV64: # %bb.0: 1563; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1564; RV64-NEXT: vredmin.vs v8, v8, v8 1565; RV64-NEXT: vmv.x.s a0, v8 1566; RV64-NEXT: ret 1567 %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v) 1568 ret i64 %red 1569} 1570 1571declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>) 1572 1573define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) { 1574; RV32-LABEL: vreduce_and_nxv2i64: 1575; RV32: # %bb.0: 1576; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1577; RV32-NEXT: vredand.vs v8, v8, v8 1578; RV32-NEXT: li a1, 32 1579; RV32-NEXT: vmv.x.s a0, v8 1580; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1581; RV32-NEXT: vsrl.vx v8, v8, a1 1582; RV32-NEXT: vmv.x.s a1, v8 1583; RV32-NEXT: ret 1584; 1585; RV64-LABEL: vreduce_and_nxv2i64: 1586; RV64: # %bb.0: 1587; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1588; RV64-NEXT: vredand.vs v8, v8, v8 1589; RV64-NEXT: vmv.x.s a0, v8 1590; RV64-NEXT: ret 1591 %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v) 1592 ret i64 %red 1593} 1594 1595declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>) 1596 1597define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) { 1598; RV32-LABEL: vreduce_or_nxv2i64: 1599; RV32: # %bb.0: 1600; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1601; RV32-NEXT: vredor.vs v8, v8, v8 1602; RV32-NEXT: li a1, 32 1603; RV32-NEXT: vmv.x.s a0, v8 1604; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1605; RV32-NEXT: vsrl.vx v8, v8, a1 1606; RV32-NEXT: vmv.x.s a1, v8 1607; RV32-NEXT: ret 1608; 1609; RV64-LABEL: vreduce_or_nxv2i64: 1610; RV64: # %bb.0: 1611; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1612; RV64-NEXT: vredor.vs v8, v8, v8 1613; RV64-NEXT: vmv.x.s a0, v8 1614; RV64-NEXT: ret 1615 %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v) 1616 ret i64 %red 1617} 1618 1619declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>) 1620 1621define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) { 1622; RV32-LABEL: vreduce_xor_nxv2i64: 1623; RV32: # %bb.0: 1624; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1625; RV32-NEXT: vmv.s.x v10, zero 1626; RV32-NEXT: li a1, 32 1627; RV32-NEXT: vredxor.vs v8, v8, v10 1628; RV32-NEXT: vmv.x.s a0, v8 1629; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1630; RV32-NEXT: vsrl.vx v8, v8, a1 1631; RV32-NEXT: vmv.x.s a1, v8 1632; RV32-NEXT: ret 1633; 1634; RV64-LABEL: vreduce_xor_nxv2i64: 1635; RV64: # %bb.0: 1636; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1637; RV64-NEXT: vmv.s.x v10, zero 1638; RV64-NEXT: vredxor.vs v8, v8, v10 1639; RV64-NEXT: vmv.x.s a0, v8 1640; RV64-NEXT: ret 1641 %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v) 1642 ret i64 %red 1643} 1644 1645declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>) 1646 1647define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) { 1648; RV32-LABEL: vreduce_add_nxv4i64: 1649; RV32: # %bb.0: 1650; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1651; RV32-NEXT: vmv.s.x v12, zero 1652; RV32-NEXT: li a1, 32 1653; RV32-NEXT: vredsum.vs v8, v8, v12 1654; RV32-NEXT: vmv.x.s a0, v8 1655; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1656; RV32-NEXT: vsrl.vx v8, v8, a1 1657; RV32-NEXT: vmv.x.s a1, v8 1658; RV32-NEXT: ret 1659; 1660; RV64-LABEL: vreduce_add_nxv4i64: 1661; RV64: # %bb.0: 1662; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1663; RV64-NEXT: vmv.s.x v12, zero 1664; RV64-NEXT: vredsum.vs v8, v8, v12 1665; RV64-NEXT: vmv.x.s a0, v8 1666; RV64-NEXT: ret 1667 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v) 1668 ret i64 %red 1669} 1670 1671define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) { 1672; RV32-LABEL: vwreduce_add_nxv4i32: 1673; RV32: # %bb.0: 1674; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1675; RV32-NEXT: vmv.s.x v10, zero 1676; RV32-NEXT: li a1, 32 1677; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1678; RV32-NEXT: vwredsum.vs v8, v8, v10 1679; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1680; RV32-NEXT: vmv.x.s a0, v8 1681; RV32-NEXT: vsrl.vx v8, v8, a1 1682; RV32-NEXT: vmv.x.s a1, v8 1683; RV32-NEXT: ret 1684; 1685; RV64-LABEL: vwreduce_add_nxv4i32: 1686; RV64: # %bb.0: 1687; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1688; RV64-NEXT: vmv.s.x v10, zero 1689; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1690; RV64-NEXT: vwredsum.vs v8, v8, v10 1691; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1692; RV64-NEXT: vmv.x.s a0, v8 1693; RV64-NEXT: ret 1694 %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64> 1695 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e) 1696 ret i64 %red 1697} 1698 1699define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) { 1700; RV32-LABEL: vwreduce_uadd_nxv4i32: 1701; RV32: # %bb.0: 1702; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1703; RV32-NEXT: vmv.s.x v10, zero 1704; RV32-NEXT: li a1, 32 1705; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1706; RV32-NEXT: vwredsumu.vs v8, v8, v10 1707; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1708; RV32-NEXT: vmv.x.s a0, v8 1709; RV32-NEXT: vsrl.vx v8, v8, a1 1710; RV32-NEXT: vmv.x.s a1, v8 1711; RV32-NEXT: ret 1712; 1713; RV64-LABEL: vwreduce_uadd_nxv4i32: 1714; RV64: # %bb.0: 1715; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1716; RV64-NEXT: vmv.s.x v10, zero 1717; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1718; RV64-NEXT: vwredsumu.vs v8, v8, v10 1719; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1720; RV64-NEXT: vmv.x.s a0, v8 1721; RV64-NEXT: ret 1722 %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64> 1723 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e) 1724 ret i64 %red 1725} 1726 1727declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>) 1728 1729define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) { 1730; RV32-LABEL: vreduce_umax_nxv4i64: 1731; RV32: # %bb.0: 1732; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1733; RV32-NEXT: vredmaxu.vs v8, v8, v8 1734; RV32-NEXT: li a1, 32 1735; RV32-NEXT: vmv.x.s a0, v8 1736; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1737; RV32-NEXT: vsrl.vx v8, v8, a1 1738; RV32-NEXT: vmv.x.s a1, v8 1739; RV32-NEXT: ret 1740; 1741; RV64-LABEL: vreduce_umax_nxv4i64: 1742; RV64: # %bb.0: 1743; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1744; RV64-NEXT: vredmaxu.vs v8, v8, v8 1745; RV64-NEXT: vmv.x.s a0, v8 1746; RV64-NEXT: ret 1747 %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v) 1748 ret i64 %red 1749} 1750 1751declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>) 1752 1753define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) { 1754; RV32-LABEL: vreduce_smax_nxv4i64: 1755; RV32: # %bb.0: 1756; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1757; RV32-NEXT: vredmax.vs v8, v8, v8 1758; RV32-NEXT: li a1, 32 1759; RV32-NEXT: vmv.x.s a0, v8 1760; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1761; RV32-NEXT: vsrl.vx v8, v8, a1 1762; RV32-NEXT: vmv.x.s a1, v8 1763; RV32-NEXT: ret 1764; 1765; RV64-LABEL: vreduce_smax_nxv4i64: 1766; RV64: # %bb.0: 1767; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1768; RV64-NEXT: vredmax.vs v8, v8, v8 1769; RV64-NEXT: vmv.x.s a0, v8 1770; RV64-NEXT: ret 1771 %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v) 1772 ret i64 %red 1773} 1774 1775declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>) 1776 1777define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) { 1778; RV32-LABEL: vreduce_umin_nxv4i64: 1779; RV32: # %bb.0: 1780; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1781; RV32-NEXT: vredminu.vs v8, v8, v8 1782; RV32-NEXT: li a1, 32 1783; RV32-NEXT: vmv.x.s a0, v8 1784; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1785; RV32-NEXT: vsrl.vx v8, v8, a1 1786; RV32-NEXT: vmv.x.s a1, v8 1787; RV32-NEXT: ret 1788; 1789; RV64-LABEL: vreduce_umin_nxv4i64: 1790; RV64: # %bb.0: 1791; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1792; RV64-NEXT: vredminu.vs v8, v8, v8 1793; RV64-NEXT: vmv.x.s a0, v8 1794; RV64-NEXT: ret 1795 %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v) 1796 ret i64 %red 1797} 1798 1799declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>) 1800 1801define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) { 1802; RV32-LABEL: vreduce_smin_nxv4i64: 1803; RV32: # %bb.0: 1804; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1805; RV32-NEXT: vredmin.vs v8, v8, v8 1806; RV32-NEXT: li a1, 32 1807; RV32-NEXT: vmv.x.s a0, v8 1808; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1809; RV32-NEXT: vsrl.vx v8, v8, a1 1810; RV32-NEXT: vmv.x.s a1, v8 1811; RV32-NEXT: ret 1812; 1813; RV64-LABEL: vreduce_smin_nxv4i64: 1814; RV64: # %bb.0: 1815; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1816; RV64-NEXT: vredmin.vs v8, v8, v8 1817; RV64-NEXT: vmv.x.s a0, v8 1818; RV64-NEXT: ret 1819 %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v) 1820 ret i64 %red 1821} 1822 1823declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>) 1824 1825define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) { 1826; RV32-LABEL: vreduce_and_nxv4i64: 1827; RV32: # %bb.0: 1828; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1829; RV32-NEXT: vredand.vs v8, v8, v8 1830; RV32-NEXT: li a1, 32 1831; RV32-NEXT: vmv.x.s a0, v8 1832; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1833; RV32-NEXT: vsrl.vx v8, v8, a1 1834; RV32-NEXT: vmv.x.s a1, v8 1835; RV32-NEXT: ret 1836; 1837; RV64-LABEL: vreduce_and_nxv4i64: 1838; RV64: # %bb.0: 1839; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1840; RV64-NEXT: vredand.vs v8, v8, v8 1841; RV64-NEXT: vmv.x.s a0, v8 1842; RV64-NEXT: ret 1843 %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v) 1844 ret i64 %red 1845} 1846 1847declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>) 1848 1849define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) { 1850; RV32-LABEL: vreduce_or_nxv4i64: 1851; RV32: # %bb.0: 1852; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1853; RV32-NEXT: vredor.vs v8, v8, v8 1854; RV32-NEXT: li a1, 32 1855; RV32-NEXT: vmv.x.s a0, v8 1856; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1857; RV32-NEXT: vsrl.vx v8, v8, a1 1858; RV32-NEXT: vmv.x.s a1, v8 1859; RV32-NEXT: ret 1860; 1861; RV64-LABEL: vreduce_or_nxv4i64: 1862; RV64: # %bb.0: 1863; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1864; RV64-NEXT: vredor.vs v8, v8, v8 1865; RV64-NEXT: vmv.x.s a0, v8 1866; RV64-NEXT: ret 1867 %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v) 1868 ret i64 %red 1869} 1870 1871declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>) 1872 1873define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) { 1874; RV32-LABEL: vreduce_xor_nxv4i64: 1875; RV32: # %bb.0: 1876; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1877; RV32-NEXT: vmv.s.x v12, zero 1878; RV32-NEXT: li a1, 32 1879; RV32-NEXT: vredxor.vs v8, v8, v12 1880; RV32-NEXT: vmv.x.s a0, v8 1881; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1882; RV32-NEXT: vsrl.vx v8, v8, a1 1883; RV32-NEXT: vmv.x.s a1, v8 1884; RV32-NEXT: ret 1885; 1886; RV64-LABEL: vreduce_xor_nxv4i64: 1887; RV64: # %bb.0: 1888; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1889; RV64-NEXT: vmv.s.x v12, zero 1890; RV64-NEXT: vredxor.vs v8, v8, v12 1891; RV64-NEXT: vmv.x.s a0, v8 1892; RV64-NEXT: ret 1893 %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v) 1894 ret i64 %red 1895} 1896