1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f -target-abi=lp64f \ 3; RUN: | FileCheck %s 4 5define void @sink_splat_mul(ptr nocapture %a, i32 signext %x) { 6; CHECK-LABEL: sink_splat_mul: 7; CHECK: # %bb.0: # %entry 8; CHECK-NEXT: lui a2, 1 9; CHECK-NEXT: add a2, a0, a2 10; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 11; CHECK-NEXT: .LBB0_1: # %vector.body 12; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 13; CHECK-NEXT: vle32.v v8, (a0) 14; CHECK-NEXT: vmul.vx v8, v8, a1 15; CHECK-NEXT: vse32.v v8, (a0) 16; CHECK-NEXT: addi a0, a0, 16 17; CHECK-NEXT: bne a0, a2, .LBB0_1 18; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 19; CHECK-NEXT: ret 20entry: 21 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 22 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 23 br label %vector.body 24 25vector.body: ; preds = %vector.body, %entry 26 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 27 %0 = getelementptr inbounds i32, ptr %a, i64 %index 28 %wide.load = load <4 x i32>, ptr %0, align 4 29 %1 = mul <4 x i32> %wide.load, %broadcast.splat 30 store <4 x i32> %1, ptr %0, align 4 31 %index.next = add nuw i64 %index, 4 32 %2 = icmp eq i64 %index.next, 1024 33 br i1 %2, label %for.cond.cleanup, label %vector.body 34 35for.cond.cleanup: ; preds = %vector.body 36 ret void 37} 38 39define void @sink_splat_add(ptr nocapture %a, i32 signext %x) { 40; CHECK-LABEL: sink_splat_add: 41; CHECK: # %bb.0: # %entry 42; CHECK-NEXT: lui a2, 1 43; CHECK-NEXT: add a2, a0, a2 44; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 45; CHECK-NEXT: .LBB1_1: # %vector.body 46; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 47; CHECK-NEXT: vle32.v v8, (a0) 48; CHECK-NEXT: vadd.vx v8, v8, a1 49; CHECK-NEXT: vse32.v v8, (a0) 50; CHECK-NEXT: addi a0, a0, 16 51; CHECK-NEXT: bne a0, a2, .LBB1_1 52; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 53; CHECK-NEXT: ret 54entry: 55 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 56 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 57 br label %vector.body 58 59vector.body: ; preds = %vector.body, %entry 60 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 61 %0 = getelementptr inbounds i32, ptr %a, i64 %index 62 %wide.load = load <4 x i32>, ptr %0, align 4 63 %1 = add <4 x i32> %wide.load, %broadcast.splat 64 store <4 x i32> %1, ptr %0, align 4 65 %index.next = add nuw i64 %index, 4 66 %2 = icmp eq i64 %index.next, 1024 67 br i1 %2, label %for.cond.cleanup, label %vector.body 68 69for.cond.cleanup: ; preds = %vector.body 70 ret void 71} 72 73define void @sink_splat_sub(ptr nocapture %a, i32 signext %x) { 74; CHECK-LABEL: sink_splat_sub: 75; CHECK: # %bb.0: # %entry 76; CHECK-NEXT: lui a2, 1 77; CHECK-NEXT: add a2, a0, a2 78; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 79; CHECK-NEXT: .LBB2_1: # %vector.body 80; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 81; CHECK-NEXT: vle32.v v8, (a0) 82; CHECK-NEXT: vsub.vx v8, v8, a1 83; CHECK-NEXT: vse32.v v8, (a0) 84; CHECK-NEXT: addi a0, a0, 16 85; CHECK-NEXT: bne a0, a2, .LBB2_1 86; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 87; CHECK-NEXT: ret 88entry: 89 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 90 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 91 br label %vector.body 92 93vector.body: ; preds = %vector.body, %entry 94 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 95 %0 = getelementptr inbounds i32, ptr %a, i64 %index 96 %wide.load = load <4 x i32>, ptr %0, align 4 97 %1 = sub <4 x i32> %wide.load, %broadcast.splat 98 store <4 x i32> %1, ptr %0, align 4 99 %index.next = add nuw i64 %index, 4 100 %2 = icmp eq i64 %index.next, 1024 101 br i1 %2, label %for.cond.cleanup, label %vector.body 102 103for.cond.cleanup: ; preds = %vector.body 104 ret void 105} 106 107define void @sink_splat_rsub(ptr nocapture %a, i32 signext %x) { 108; CHECK-LABEL: sink_splat_rsub: 109; CHECK: # %bb.0: # %entry 110; CHECK-NEXT: lui a2, 1 111; CHECK-NEXT: add a2, a0, a2 112; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 113; CHECK-NEXT: .LBB3_1: # %vector.body 114; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 115; CHECK-NEXT: vle32.v v8, (a0) 116; CHECK-NEXT: vrsub.vx v8, v8, a1 117; CHECK-NEXT: vse32.v v8, (a0) 118; CHECK-NEXT: addi a0, a0, 16 119; CHECK-NEXT: bne a0, a2, .LBB3_1 120; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 121; CHECK-NEXT: ret 122entry: 123 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 124 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 125 br label %vector.body 126 127vector.body: ; preds = %vector.body, %entry 128 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 129 %0 = getelementptr inbounds i32, ptr %a, i64 %index 130 %wide.load = load <4 x i32>, ptr %0, align 4 131 %1 = sub <4 x i32> %broadcast.splat, %wide.load 132 store <4 x i32> %1, ptr %0, align 4 133 %index.next = add nuw i64 %index, 4 134 %2 = icmp eq i64 %index.next, 1024 135 br i1 %2, label %for.cond.cleanup, label %vector.body 136 137for.cond.cleanup: ; preds = %vector.body 138 ret void 139} 140 141define void @sink_splat_and(ptr nocapture %a, i32 signext %x) { 142; CHECK-LABEL: sink_splat_and: 143; CHECK: # %bb.0: # %entry 144; CHECK-NEXT: lui a2, 1 145; CHECK-NEXT: add a2, a0, a2 146; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 147; CHECK-NEXT: .LBB4_1: # %vector.body 148; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 149; CHECK-NEXT: vle32.v v8, (a0) 150; CHECK-NEXT: vand.vx v8, v8, a1 151; CHECK-NEXT: vse32.v v8, (a0) 152; CHECK-NEXT: addi a0, a0, 16 153; CHECK-NEXT: bne a0, a2, .LBB4_1 154; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 155; CHECK-NEXT: ret 156entry: 157 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 158 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 159 br label %vector.body 160 161vector.body: ; preds = %vector.body, %entry 162 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 163 %0 = getelementptr inbounds i32, ptr %a, i64 %index 164 %wide.load = load <4 x i32>, ptr %0, align 4 165 %1 = and <4 x i32> %wide.load, %broadcast.splat 166 store <4 x i32> %1, ptr %0, align 4 167 %index.next = add nuw i64 %index, 4 168 %2 = icmp eq i64 %index.next, 1024 169 br i1 %2, label %for.cond.cleanup, label %vector.body 170 171for.cond.cleanup: ; preds = %vector.body 172 ret void 173} 174 175define void @sink_splat_or(ptr nocapture %a, i32 signext %x) { 176; CHECK-LABEL: sink_splat_or: 177; CHECK: # %bb.0: # %entry 178; CHECK-NEXT: lui a2, 1 179; CHECK-NEXT: add a2, a0, a2 180; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 181; CHECK-NEXT: .LBB5_1: # %vector.body 182; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 183; CHECK-NEXT: vle32.v v8, (a0) 184; CHECK-NEXT: vor.vx v8, v8, a1 185; CHECK-NEXT: vse32.v v8, (a0) 186; CHECK-NEXT: addi a0, a0, 16 187; CHECK-NEXT: bne a0, a2, .LBB5_1 188; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 189; CHECK-NEXT: ret 190entry: 191 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 192 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 193 br label %vector.body 194 195vector.body: ; preds = %vector.body, %entry 196 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 197 %0 = getelementptr inbounds i32, ptr %a, i64 %index 198 %wide.load = load <4 x i32>, ptr %0, align 4 199 %1 = or <4 x i32> %wide.load, %broadcast.splat 200 store <4 x i32> %1, ptr %0, align 4 201 %index.next = add nuw i64 %index, 4 202 %2 = icmp eq i64 %index.next, 1024 203 br i1 %2, label %for.cond.cleanup, label %vector.body 204 205for.cond.cleanup: ; preds = %vector.body 206 ret void 207} 208 209define void @sink_splat_xor(ptr nocapture %a, i32 signext %x) { 210; CHECK-LABEL: sink_splat_xor: 211; CHECK: # %bb.0: # %entry 212; CHECK-NEXT: lui a2, 1 213; CHECK-NEXT: add a2, a0, a2 214; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 215; CHECK-NEXT: .LBB6_1: # %vector.body 216; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 217; CHECK-NEXT: vle32.v v8, (a0) 218; CHECK-NEXT: vxor.vx v8, v8, a1 219; CHECK-NEXT: vse32.v v8, (a0) 220; CHECK-NEXT: addi a0, a0, 16 221; CHECK-NEXT: bne a0, a2, .LBB6_1 222; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 223; CHECK-NEXT: ret 224entry: 225 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 226 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 227 br label %vector.body 228 229vector.body: ; preds = %vector.body, %entry 230 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 231 %0 = getelementptr inbounds i32, ptr %a, i64 %index 232 %wide.load = load <4 x i32>, ptr %0, align 4 233 %1 = xor <4 x i32> %wide.load, %broadcast.splat 234 store <4 x i32> %1, ptr %0, align 4 235 %index.next = add nuw i64 %index, 4 236 %2 = icmp eq i64 %index.next, 1024 237 br i1 %2, label %for.cond.cleanup, label %vector.body 238 239for.cond.cleanup: ; preds = %vector.body 240 ret void 241} 242 243define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) { 244; CHECK-LABEL: sink_splat_mul_scalable: 245; CHECK: # %bb.0: # %entry 246; CHECK-NEXT: csrr a5, vlenb 247; CHECK-NEXT: srli a3, a5, 1 248; CHECK-NEXT: li a2, 1024 249; CHECK-NEXT: bgeu a2, a3, .LBB7_2 250; CHECK-NEXT: # %bb.1: 251; CHECK-NEXT: li a2, 0 252; CHECK-NEXT: j .LBB7_5 253; CHECK-NEXT: .LBB7_2: # %vector.ph 254; CHECK-NEXT: addi a2, a3, -1 255; CHECK-NEXT: andi a4, a2, 1024 256; CHECK-NEXT: xori a2, a4, 1024 257; CHECK-NEXT: slli a5, a5, 1 258; CHECK-NEXT: mv a6, a0 259; CHECK-NEXT: mv a7, a2 260; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 261; CHECK-NEXT: .LBB7_3: # %vector.body 262; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 263; CHECK-NEXT: vl2re32.v v8, (a6) 264; CHECK-NEXT: sub a7, a7, a3 265; CHECK-NEXT: vmul.vx v8, v8, a1 266; CHECK-NEXT: vs2r.v v8, (a6) 267; CHECK-NEXT: add a6, a6, a5 268; CHECK-NEXT: bnez a7, .LBB7_3 269; CHECK-NEXT: # %bb.4: # %middle.block 270; CHECK-NEXT: beqz a4, .LBB7_7 271; CHECK-NEXT: .LBB7_5: # %for.body.preheader 272; CHECK-NEXT: slli a2, a2, 2 273; CHECK-NEXT: lui a3, 1 274; CHECK-NEXT: add a2, a0, a2 275; CHECK-NEXT: add a0, a0, a3 276; CHECK-NEXT: .LBB7_6: # %for.body 277; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 278; CHECK-NEXT: lw a3, 0(a2) 279; CHECK-NEXT: mul a3, a3, a1 280; CHECK-NEXT: sw a3, 0(a2) 281; CHECK-NEXT: addi a2, a2, 4 282; CHECK-NEXT: bne a2, a0, .LBB7_6 283; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup 284; CHECK-NEXT: ret 285entry: 286 %0 = call i64 @llvm.vscale.i64() 287 %1 = shl i64 %0, 2 288 %min.iters.check = icmp ugt i64 %1, 1024 289 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 290 291vector.ph: ; preds = %entry 292 %2 = call i64 @llvm.vscale.i64() 293 %3 = shl i64 %2, 2 294 %n.mod.vf = urem i64 1024, %3 295 %n.vec = sub nsw i64 1024, %n.mod.vf 296 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 297 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 298 %4 = call i64 @llvm.vscale.i64() 299 %5 = shl i64 %4, 2 300 br label %vector.body 301 302vector.body: ; preds = %vector.body, %vector.ph 303 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 304 %6 = getelementptr inbounds i32, ptr %a, i64 %index 305 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 306 %7 = mul <vscale x 4 x i32> %wide.load, %broadcast.splat 307 store <vscale x 4 x i32> %7, ptr %6, align 4 308 %index.next = add nuw i64 %index, %5 309 %8 = icmp eq i64 %index.next, %n.vec 310 br i1 %8, label %middle.block, label %vector.body 311 312middle.block: ; preds = %vector.body 313 %cmp.n = icmp eq i64 %n.mod.vf, 0 314 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 315 316for.body.preheader: ; preds = %entry, %middle.block 317 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 318 br label %for.body 319 320for.cond.cleanup: ; preds = %for.body, %middle.block 321 ret void 322 323for.body: ; preds = %for.body.preheader, %for.body 324 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 325 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 326 %9 = load i32, ptr %arrayidx, align 4 327 %mul = mul i32 %9, %x 328 store i32 %mul, ptr %arrayidx, align 4 329 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 330 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 331 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 332} 333 334define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { 335; CHECK-LABEL: sink_splat_add_scalable: 336; CHECK: # %bb.0: # %entry 337; CHECK-NEXT: csrr a5, vlenb 338; CHECK-NEXT: srli a3, a5, 1 339; CHECK-NEXT: li a2, 1024 340; CHECK-NEXT: bgeu a2, a3, .LBB8_2 341; CHECK-NEXT: # %bb.1: 342; CHECK-NEXT: li a2, 0 343; CHECK-NEXT: j .LBB8_5 344; CHECK-NEXT: .LBB8_2: # %vector.ph 345; CHECK-NEXT: addi a2, a3, -1 346; CHECK-NEXT: andi a4, a2, 1024 347; CHECK-NEXT: xori a2, a4, 1024 348; CHECK-NEXT: slli a5, a5, 1 349; CHECK-NEXT: mv a6, a0 350; CHECK-NEXT: mv a7, a2 351; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 352; CHECK-NEXT: .LBB8_3: # %vector.body 353; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 354; CHECK-NEXT: vl2re32.v v8, (a6) 355; CHECK-NEXT: sub a7, a7, a3 356; CHECK-NEXT: vadd.vx v8, v8, a1 357; CHECK-NEXT: vs2r.v v8, (a6) 358; CHECK-NEXT: add a6, a6, a5 359; CHECK-NEXT: bnez a7, .LBB8_3 360; CHECK-NEXT: # %bb.4: # %middle.block 361; CHECK-NEXT: beqz a4, .LBB8_7 362; CHECK-NEXT: .LBB8_5: # %for.body.preheader 363; CHECK-NEXT: slli a2, a2, 2 364; CHECK-NEXT: lui a3, 1 365; CHECK-NEXT: add a2, a0, a2 366; CHECK-NEXT: add a0, a0, a3 367; CHECK-NEXT: .LBB8_6: # %for.body 368; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 369; CHECK-NEXT: lw a3, 0(a2) 370; CHECK-NEXT: add a3, a3, a1 371; CHECK-NEXT: sw a3, 0(a2) 372; CHECK-NEXT: addi a2, a2, 4 373; CHECK-NEXT: bne a2, a0, .LBB8_6 374; CHECK-NEXT: .LBB8_7: # %for.cond.cleanup 375; CHECK-NEXT: ret 376entry: 377 %0 = call i64 @llvm.vscale.i64() 378 %1 = shl i64 %0, 2 379 %min.iters.check = icmp ugt i64 %1, 1024 380 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 381 382vector.ph: ; preds = %entry 383 %2 = call i64 @llvm.vscale.i64() 384 %3 = shl i64 %2, 2 385 %n.mod.vf = urem i64 1024, %3 386 %n.vec = sub nsw i64 1024, %n.mod.vf 387 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 388 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 389 %4 = call i64 @llvm.vscale.i64() 390 %5 = shl i64 %4, 2 391 br label %vector.body 392 393vector.body: ; preds = %vector.body, %vector.ph 394 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 395 %6 = getelementptr inbounds i32, ptr %a, i64 %index 396 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 397 %7 = add <vscale x 4 x i32> %wide.load, %broadcast.splat 398 store <vscale x 4 x i32> %7, ptr %6, align 4 399 %index.next = add nuw i64 %index, %5 400 %8 = icmp eq i64 %index.next, %n.vec 401 br i1 %8, label %middle.block, label %vector.body 402 403middle.block: ; preds = %vector.body 404 %cmp.n = icmp eq i64 %n.mod.vf, 0 405 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 406 407for.body.preheader: ; preds = %entry, %middle.block 408 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 409 br label %for.body 410 411for.cond.cleanup: ; preds = %for.body, %middle.block 412 ret void 413 414for.body: ; preds = %for.body.preheader, %for.body 415 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 416 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 417 %9 = load i32, ptr %arrayidx, align 4 418 %add = add i32 %9, %x 419 store i32 %add, ptr %arrayidx, align 4 420 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 421 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 422 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 423} 424 425define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) { 426; CHECK-LABEL: sink_splat_sub_scalable: 427; CHECK: # %bb.0: # %entry 428; CHECK-NEXT: csrr a5, vlenb 429; CHECK-NEXT: srli a3, a5, 1 430; CHECK-NEXT: li a2, 1024 431; CHECK-NEXT: bgeu a2, a3, .LBB9_2 432; CHECK-NEXT: # %bb.1: 433; CHECK-NEXT: li a2, 0 434; CHECK-NEXT: j .LBB9_5 435; CHECK-NEXT: .LBB9_2: # %vector.ph 436; CHECK-NEXT: addi a2, a3, -1 437; CHECK-NEXT: andi a4, a2, 1024 438; CHECK-NEXT: xori a2, a4, 1024 439; CHECK-NEXT: slli a5, a5, 1 440; CHECK-NEXT: mv a6, a0 441; CHECK-NEXT: mv a7, a2 442; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 443; CHECK-NEXT: .LBB9_3: # %vector.body 444; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 445; CHECK-NEXT: vl2re32.v v8, (a6) 446; CHECK-NEXT: sub a7, a7, a3 447; CHECK-NEXT: vsub.vx v8, v8, a1 448; CHECK-NEXT: vs2r.v v8, (a6) 449; CHECK-NEXT: add a6, a6, a5 450; CHECK-NEXT: bnez a7, .LBB9_3 451; CHECK-NEXT: # %bb.4: # %middle.block 452; CHECK-NEXT: beqz a4, .LBB9_7 453; CHECK-NEXT: .LBB9_5: # %for.body.preheader 454; CHECK-NEXT: slli a2, a2, 2 455; CHECK-NEXT: lui a3, 1 456; CHECK-NEXT: add a2, a0, a2 457; CHECK-NEXT: add a0, a0, a3 458; CHECK-NEXT: .LBB9_6: # %for.body 459; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 460; CHECK-NEXT: lw a3, 0(a2) 461; CHECK-NEXT: add a3, a3, a1 462; CHECK-NEXT: sw a3, 0(a2) 463; CHECK-NEXT: addi a2, a2, 4 464; CHECK-NEXT: bne a2, a0, .LBB9_6 465; CHECK-NEXT: .LBB9_7: # %for.cond.cleanup 466; CHECK-NEXT: ret 467entry: 468 %0 = call i64 @llvm.vscale.i64() 469 %1 = shl i64 %0, 2 470 %min.iters.check = icmp ugt i64 %1, 1024 471 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 472 473vector.ph: ; preds = %entry 474 %2 = call i64 @llvm.vscale.i64() 475 %3 = shl i64 %2, 2 476 %n.mod.vf = urem i64 1024, %3 477 %n.vec = sub nsw i64 1024, %n.mod.vf 478 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 479 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 480 %4 = call i64 @llvm.vscale.i64() 481 %5 = shl i64 %4, 2 482 br label %vector.body 483 484vector.body: ; preds = %vector.body, %vector.ph 485 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 486 %6 = getelementptr inbounds i32, ptr %a, i64 %index 487 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 488 %7 = sub <vscale x 4 x i32> %wide.load, %broadcast.splat 489 store <vscale x 4 x i32> %7, ptr %6, align 4 490 %index.next = add nuw i64 %index, %5 491 %8 = icmp eq i64 %index.next, %n.vec 492 br i1 %8, label %middle.block, label %vector.body 493 494middle.block: ; preds = %vector.body 495 %cmp.n = icmp eq i64 %n.mod.vf, 0 496 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 497 498for.body.preheader: ; preds = %entry, %middle.block 499 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 500 br label %for.body 501 502for.cond.cleanup: ; preds = %for.body, %middle.block 503 ret void 504 505for.body: ; preds = %for.body.preheader, %for.body 506 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 507 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 508 %9 = load i32, ptr %arrayidx, align 4 509 %add = add i32 %9, %x 510 store i32 %add, ptr %arrayidx, align 4 511 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 512 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 513 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 514} 515 516define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) { 517; CHECK-LABEL: sink_splat_rsub_scalable: 518; CHECK: # %bb.0: # %entry 519; CHECK-NEXT: csrr a5, vlenb 520; CHECK-NEXT: srli a3, a5, 1 521; CHECK-NEXT: li a2, 1024 522; CHECK-NEXT: bgeu a2, a3, .LBB10_2 523; CHECK-NEXT: # %bb.1: 524; CHECK-NEXT: li a2, 0 525; CHECK-NEXT: j .LBB10_5 526; CHECK-NEXT: .LBB10_2: # %vector.ph 527; CHECK-NEXT: addi a2, a3, -1 528; CHECK-NEXT: andi a4, a2, 1024 529; CHECK-NEXT: xori a2, a4, 1024 530; CHECK-NEXT: slli a5, a5, 1 531; CHECK-NEXT: mv a6, a0 532; CHECK-NEXT: mv a7, a2 533; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 534; CHECK-NEXT: .LBB10_3: # %vector.body 535; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 536; CHECK-NEXT: vl2re32.v v8, (a6) 537; CHECK-NEXT: sub a7, a7, a3 538; CHECK-NEXT: vrsub.vx v8, v8, a1 539; CHECK-NEXT: vs2r.v v8, (a6) 540; CHECK-NEXT: add a6, a6, a5 541; CHECK-NEXT: bnez a7, .LBB10_3 542; CHECK-NEXT: # %bb.4: # %middle.block 543; CHECK-NEXT: beqz a4, .LBB10_7 544; CHECK-NEXT: .LBB10_5: # %for.body.preheader 545; CHECK-NEXT: slli a2, a2, 2 546; CHECK-NEXT: lui a3, 1 547; CHECK-NEXT: add a2, a0, a2 548; CHECK-NEXT: add a0, a0, a3 549; CHECK-NEXT: .LBB10_6: # %for.body 550; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 551; CHECK-NEXT: lw a3, 0(a2) 552; CHECK-NEXT: subw a3, a1, a3 553; CHECK-NEXT: sw a3, 0(a2) 554; CHECK-NEXT: addi a2, a2, 4 555; CHECK-NEXT: bne a2, a0, .LBB10_6 556; CHECK-NEXT: .LBB10_7: # %for.cond.cleanup 557; CHECK-NEXT: ret 558entry: 559 %0 = call i64 @llvm.vscale.i64() 560 %1 = shl i64 %0, 2 561 %min.iters.check = icmp ugt i64 %1, 1024 562 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 563 564vector.ph: ; preds = %entry 565 %2 = call i64 @llvm.vscale.i64() 566 %3 = shl i64 %2, 2 567 %n.mod.vf = urem i64 1024, %3 568 %n.vec = sub nsw i64 1024, %n.mod.vf 569 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 570 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 571 %4 = call i64 @llvm.vscale.i64() 572 %5 = shl i64 %4, 2 573 br label %vector.body 574 575vector.body: ; preds = %vector.body, %vector.ph 576 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 577 %6 = getelementptr inbounds i32, ptr %a, i64 %index 578 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 579 %7 = sub <vscale x 4 x i32> %broadcast.splat, %wide.load 580 store <vscale x 4 x i32> %7, ptr %6, align 4 581 %index.next = add nuw i64 %index, %5 582 %8 = icmp eq i64 %index.next, %n.vec 583 br i1 %8, label %middle.block, label %vector.body 584 585middle.block: ; preds = %vector.body 586 %cmp.n = icmp eq i64 %n.mod.vf, 0 587 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 588 589for.body.preheader: ; preds = %entry, %middle.block 590 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 591 br label %for.body 592 593for.cond.cleanup: ; preds = %for.body, %middle.block 594 ret void 595 596for.body: ; preds = %for.body.preheader, %for.body 597 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 598 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 599 %9 = load i32, ptr %arrayidx, align 4 600 %add = sub i32 %x, %9 601 store i32 %add, ptr %arrayidx, align 4 602 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 603 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 604 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 605} 606 607define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) { 608; CHECK-LABEL: sink_splat_and_scalable: 609; CHECK: # %bb.0: # %entry 610; CHECK-NEXT: csrr a5, vlenb 611; CHECK-NEXT: srli a3, a5, 1 612; CHECK-NEXT: li a2, 1024 613; CHECK-NEXT: bgeu a2, a3, .LBB11_2 614; CHECK-NEXT: # %bb.1: 615; CHECK-NEXT: li a2, 0 616; CHECK-NEXT: j .LBB11_5 617; CHECK-NEXT: .LBB11_2: # %vector.ph 618; CHECK-NEXT: addi a2, a3, -1 619; CHECK-NEXT: andi a4, a2, 1024 620; CHECK-NEXT: xori a2, a4, 1024 621; CHECK-NEXT: slli a5, a5, 1 622; CHECK-NEXT: mv a6, a0 623; CHECK-NEXT: mv a7, a2 624; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 625; CHECK-NEXT: .LBB11_3: # %vector.body 626; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 627; CHECK-NEXT: vl2re32.v v8, (a6) 628; CHECK-NEXT: sub a7, a7, a3 629; CHECK-NEXT: vand.vx v8, v8, a1 630; CHECK-NEXT: vs2r.v v8, (a6) 631; CHECK-NEXT: add a6, a6, a5 632; CHECK-NEXT: bnez a7, .LBB11_3 633; CHECK-NEXT: # %bb.4: # %middle.block 634; CHECK-NEXT: beqz a4, .LBB11_7 635; CHECK-NEXT: .LBB11_5: # %for.body.preheader 636; CHECK-NEXT: slli a2, a2, 2 637; CHECK-NEXT: lui a3, 1 638; CHECK-NEXT: add a2, a0, a2 639; CHECK-NEXT: add a0, a0, a3 640; CHECK-NEXT: .LBB11_6: # %for.body 641; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 642; CHECK-NEXT: lw a3, 0(a2) 643; CHECK-NEXT: and a3, a3, a1 644; CHECK-NEXT: sw a3, 0(a2) 645; CHECK-NEXT: addi a2, a2, 4 646; CHECK-NEXT: bne a2, a0, .LBB11_6 647; CHECK-NEXT: .LBB11_7: # %for.cond.cleanup 648; CHECK-NEXT: ret 649entry: 650 %0 = call i64 @llvm.vscale.i64() 651 %1 = shl i64 %0, 2 652 %min.iters.check = icmp ugt i64 %1, 1024 653 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 654 655vector.ph: ; preds = %entry 656 %2 = call i64 @llvm.vscale.i64() 657 %3 = shl i64 %2, 2 658 %n.mod.vf = urem i64 1024, %3 659 %n.vec = sub nsw i64 1024, %n.mod.vf 660 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 661 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 662 %4 = call i64 @llvm.vscale.i64() 663 %5 = shl i64 %4, 2 664 br label %vector.body 665 666vector.body: ; preds = %vector.body, %vector.ph 667 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 668 %6 = getelementptr inbounds i32, ptr %a, i64 %index 669 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 670 %7 = and <vscale x 4 x i32> %wide.load, %broadcast.splat 671 store <vscale x 4 x i32> %7, ptr %6, align 4 672 %index.next = add nuw i64 %index, %5 673 %8 = icmp eq i64 %index.next, %n.vec 674 br i1 %8, label %middle.block, label %vector.body 675 676middle.block: ; preds = %vector.body 677 %cmp.n = icmp eq i64 %n.mod.vf, 0 678 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 679 680for.body.preheader: ; preds = %entry, %middle.block 681 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 682 br label %for.body 683 684for.cond.cleanup: ; preds = %for.body, %middle.block 685 ret void 686 687for.body: ; preds = %for.body.preheader, %for.body 688 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 689 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 690 %9 = load i32, ptr %arrayidx, align 4 691 %and = and i32 %9, %x 692 store i32 %and, ptr %arrayidx, align 4 693 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 694 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 695 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 696} 697 698define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) { 699; CHECK-LABEL: sink_splat_or_scalable: 700; CHECK: # %bb.0: # %entry 701; CHECK-NEXT: csrr a5, vlenb 702; CHECK-NEXT: srli a3, a5, 1 703; CHECK-NEXT: li a2, 1024 704; CHECK-NEXT: bgeu a2, a3, .LBB12_2 705; CHECK-NEXT: # %bb.1: 706; CHECK-NEXT: li a2, 0 707; CHECK-NEXT: j .LBB12_5 708; CHECK-NEXT: .LBB12_2: # %vector.ph 709; CHECK-NEXT: addi a2, a3, -1 710; CHECK-NEXT: andi a4, a2, 1024 711; CHECK-NEXT: xori a2, a4, 1024 712; CHECK-NEXT: slli a5, a5, 1 713; CHECK-NEXT: mv a6, a0 714; CHECK-NEXT: mv a7, a2 715; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 716; CHECK-NEXT: .LBB12_3: # %vector.body 717; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 718; CHECK-NEXT: vl2re32.v v8, (a6) 719; CHECK-NEXT: sub a7, a7, a3 720; CHECK-NEXT: vor.vx v8, v8, a1 721; CHECK-NEXT: vs2r.v v8, (a6) 722; CHECK-NEXT: add a6, a6, a5 723; CHECK-NEXT: bnez a7, .LBB12_3 724; CHECK-NEXT: # %bb.4: # %middle.block 725; CHECK-NEXT: beqz a4, .LBB12_7 726; CHECK-NEXT: .LBB12_5: # %for.body.preheader 727; CHECK-NEXT: slli a2, a2, 2 728; CHECK-NEXT: lui a3, 1 729; CHECK-NEXT: add a2, a0, a2 730; CHECK-NEXT: add a0, a0, a3 731; CHECK-NEXT: .LBB12_6: # %for.body 732; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 733; CHECK-NEXT: lw a3, 0(a2) 734; CHECK-NEXT: or a3, a3, a1 735; CHECK-NEXT: sw a3, 0(a2) 736; CHECK-NEXT: addi a2, a2, 4 737; CHECK-NEXT: bne a2, a0, .LBB12_6 738; CHECK-NEXT: .LBB12_7: # %for.cond.cleanup 739; CHECK-NEXT: ret 740entry: 741 %0 = call i64 @llvm.vscale.i64() 742 %1 = shl i64 %0, 2 743 %min.iters.check = icmp ugt i64 %1, 1024 744 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 745 746vector.ph: ; preds = %entry 747 %2 = call i64 @llvm.vscale.i64() 748 %3 = shl i64 %2, 2 749 %n.mod.vf = urem i64 1024, %3 750 %n.vec = sub nsw i64 1024, %n.mod.vf 751 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 752 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 753 %4 = call i64 @llvm.vscale.i64() 754 %5 = shl i64 %4, 2 755 br label %vector.body 756 757vector.body: ; preds = %vector.body, %vector.ph 758 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 759 %6 = getelementptr inbounds i32, ptr %a, i64 %index 760 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 761 %7 = or <vscale x 4 x i32> %wide.load, %broadcast.splat 762 store <vscale x 4 x i32> %7, ptr %6, align 4 763 %index.next = add nuw i64 %index, %5 764 %8 = icmp eq i64 %index.next, %n.vec 765 br i1 %8, label %middle.block, label %vector.body 766 767middle.block: ; preds = %vector.body 768 %cmp.n = icmp eq i64 %n.mod.vf, 0 769 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 770 771for.body.preheader: ; preds = %entry, %middle.block 772 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 773 br label %for.body 774 775for.cond.cleanup: ; preds = %for.body, %middle.block 776 ret void 777 778for.body: ; preds = %for.body.preheader, %for.body 779 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 780 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 781 %9 = load i32, ptr %arrayidx, align 4 782 %or = or i32 %9, %x 783 store i32 %or, ptr %arrayidx, align 4 784 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 785 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 786 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 787} 788 789define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) { 790; CHECK-LABEL: sink_splat_xor_scalable: 791; CHECK: # %bb.0: # %entry 792; CHECK-NEXT: csrr a5, vlenb 793; CHECK-NEXT: srli a3, a5, 1 794; CHECK-NEXT: li a2, 1024 795; CHECK-NEXT: bgeu a2, a3, .LBB13_2 796; CHECK-NEXT: # %bb.1: 797; CHECK-NEXT: li a2, 0 798; CHECK-NEXT: j .LBB13_5 799; CHECK-NEXT: .LBB13_2: # %vector.ph 800; CHECK-NEXT: addi a2, a3, -1 801; CHECK-NEXT: andi a4, a2, 1024 802; CHECK-NEXT: xori a2, a4, 1024 803; CHECK-NEXT: slli a5, a5, 1 804; CHECK-NEXT: mv a6, a0 805; CHECK-NEXT: mv a7, a2 806; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 807; CHECK-NEXT: .LBB13_3: # %vector.body 808; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 809; CHECK-NEXT: vl2re32.v v8, (a6) 810; CHECK-NEXT: sub a7, a7, a3 811; CHECK-NEXT: vxor.vx v8, v8, a1 812; CHECK-NEXT: vs2r.v v8, (a6) 813; CHECK-NEXT: add a6, a6, a5 814; CHECK-NEXT: bnez a7, .LBB13_3 815; CHECK-NEXT: # %bb.4: # %middle.block 816; CHECK-NEXT: beqz a4, .LBB13_7 817; CHECK-NEXT: .LBB13_5: # %for.body.preheader 818; CHECK-NEXT: slli a2, a2, 2 819; CHECK-NEXT: lui a3, 1 820; CHECK-NEXT: add a2, a0, a2 821; CHECK-NEXT: add a0, a0, a3 822; CHECK-NEXT: .LBB13_6: # %for.body 823; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 824; CHECK-NEXT: lw a3, 0(a2) 825; CHECK-NEXT: xor a3, a3, a1 826; CHECK-NEXT: sw a3, 0(a2) 827; CHECK-NEXT: addi a2, a2, 4 828; CHECK-NEXT: bne a2, a0, .LBB13_6 829; CHECK-NEXT: .LBB13_7: # %for.cond.cleanup 830; CHECK-NEXT: ret 831entry: 832 %0 = call i64 @llvm.vscale.i64() 833 %1 = shl i64 %0, 2 834 %min.iters.check = icmp ugt i64 %1, 1024 835 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 836 837vector.ph: ; preds = %entry 838 %2 = call i64 @llvm.vscale.i64() 839 %3 = shl i64 %2, 2 840 %n.mod.vf = urem i64 1024, %3 841 %n.vec = sub nsw i64 1024, %n.mod.vf 842 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 843 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 844 %4 = call i64 @llvm.vscale.i64() 845 %5 = shl i64 %4, 2 846 br label %vector.body 847 848vector.body: ; preds = %vector.body, %vector.ph 849 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 850 %6 = getelementptr inbounds i32, ptr %a, i64 %index 851 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 852 %7 = xor <vscale x 4 x i32> %wide.load, %broadcast.splat 853 store <vscale x 4 x i32> %7, ptr %6, align 4 854 %index.next = add nuw i64 %index, %5 855 %8 = icmp eq i64 %index.next, %n.vec 856 br i1 %8, label %middle.block, label %vector.body 857 858middle.block: ; preds = %vector.body 859 %cmp.n = icmp eq i64 %n.mod.vf, 0 860 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 861 862for.body.preheader: ; preds = %entry, %middle.block 863 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 864 br label %for.body 865 866for.cond.cleanup: ; preds = %for.body, %middle.block 867 ret void 868 869for.body: ; preds = %for.body.preheader, %for.body 870 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 871 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 872 %9 = load i32, ptr %arrayidx, align 4 873 %xor = xor i32 %9, %x 874 store i32 %xor, ptr %arrayidx, align 4 875 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 876 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 877 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 878} 879 880define void @sink_splat_shl(ptr nocapture %a, i32 signext %x) { 881; CHECK-LABEL: sink_splat_shl: 882; CHECK: # %bb.0: # %entry 883; CHECK-NEXT: lui a2, 1 884; CHECK-NEXT: add a2, a0, a2 885; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 886; CHECK-NEXT: .LBB14_1: # %vector.body 887; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 888; CHECK-NEXT: vle32.v v8, (a0) 889; CHECK-NEXT: vsll.vx v8, v8, a1 890; CHECK-NEXT: vse32.v v8, (a0) 891; CHECK-NEXT: addi a0, a0, 16 892; CHECK-NEXT: bne a0, a2, .LBB14_1 893; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 894; CHECK-NEXT: ret 895entry: 896 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 897 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 898 br label %vector.body 899 900vector.body: ; preds = %vector.body, %entry 901 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 902 %0 = getelementptr inbounds i32, ptr %a, i64 %index 903 %wide.load = load <4 x i32>, ptr %0, align 4 904 %1 = shl <4 x i32> %wide.load, %broadcast.splat 905 store <4 x i32> %1, ptr %0, align 4 906 %index.next = add nuw i64 %index, 4 907 %2 = icmp eq i64 %index.next, 1024 908 br i1 %2, label %for.cond.cleanup, label %vector.body 909 910for.cond.cleanup: ; preds = %vector.body 911 ret void 912} 913 914define void @sink_splat_lshr(ptr nocapture %a, i32 signext %x) { 915; CHECK-LABEL: sink_splat_lshr: 916; CHECK: # %bb.0: # %entry 917; CHECK-NEXT: lui a2, 1 918; CHECK-NEXT: add a2, a0, a2 919; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 920; CHECK-NEXT: .LBB15_1: # %vector.body 921; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 922; CHECK-NEXT: vle32.v v8, (a0) 923; CHECK-NEXT: vsrl.vx v8, v8, a1 924; CHECK-NEXT: vse32.v v8, (a0) 925; CHECK-NEXT: addi a0, a0, 16 926; CHECK-NEXT: bne a0, a2, .LBB15_1 927; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 928; CHECK-NEXT: ret 929entry: 930 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 931 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 932 br label %vector.body 933 934vector.body: ; preds = %vector.body, %entry 935 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 936 %0 = getelementptr inbounds i32, ptr %a, i64 %index 937 %wide.load = load <4 x i32>, ptr %0, align 4 938 %1 = lshr <4 x i32> %wide.load, %broadcast.splat 939 store <4 x i32> %1, ptr %0, align 4 940 %index.next = add nuw i64 %index, 4 941 %2 = icmp eq i64 %index.next, 1024 942 br i1 %2, label %for.cond.cleanup, label %vector.body 943 944for.cond.cleanup: ; preds = %vector.body 945 ret void 946} 947 948define void @sink_splat_ashr(ptr nocapture %a, i32 signext %x) { 949; CHECK-LABEL: sink_splat_ashr: 950; CHECK: # %bb.0: # %entry 951; CHECK-NEXT: lui a2, 1 952; CHECK-NEXT: add a2, a0, a2 953; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 954; CHECK-NEXT: .LBB16_1: # %vector.body 955; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 956; CHECK-NEXT: vle32.v v8, (a0) 957; CHECK-NEXT: vsra.vx v8, v8, a1 958; CHECK-NEXT: vse32.v v8, (a0) 959; CHECK-NEXT: addi a0, a0, 16 960; CHECK-NEXT: bne a0, a2, .LBB16_1 961; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 962; CHECK-NEXT: ret 963entry: 964 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 965 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 966 br label %vector.body 967 968vector.body: ; preds = %vector.body, %entry 969 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 970 %0 = getelementptr inbounds i32, ptr %a, i64 %index 971 %wide.load = load <4 x i32>, ptr %0, align 4 972 %1 = ashr <4 x i32> %wide.load, %broadcast.splat 973 store <4 x i32> %1, ptr %0, align 4 974 %index.next = add nuw i64 %index, 4 975 %2 = icmp eq i64 %index.next, 1024 976 br i1 %2, label %for.cond.cleanup, label %vector.body 977 978for.cond.cleanup: ; preds = %vector.body 979 ret void 980} 981 982define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) { 983; CHECK-LABEL: sink_splat_shl_scalable: 984; CHECK: # %bb.0: # %entry 985; CHECK-NEXT: csrr a5, vlenb 986; CHECK-NEXT: srli a3, a5, 1 987; CHECK-NEXT: li a2, 1024 988; CHECK-NEXT: bgeu a2, a3, .LBB17_2 989; CHECK-NEXT: # %bb.1: 990; CHECK-NEXT: li a2, 0 991; CHECK-NEXT: j .LBB17_5 992; CHECK-NEXT: .LBB17_2: # %vector.ph 993; CHECK-NEXT: addi a2, a3, -1 994; CHECK-NEXT: andi a4, a2, 1024 995; CHECK-NEXT: xori a2, a4, 1024 996; CHECK-NEXT: slli a5, a5, 1 997; CHECK-NEXT: mv a6, a0 998; CHECK-NEXT: mv a7, a2 999; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 1000; CHECK-NEXT: .LBB17_3: # %vector.body 1001; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1002; CHECK-NEXT: vl2re32.v v8, (a6) 1003; CHECK-NEXT: sub a7, a7, a3 1004; CHECK-NEXT: vsll.vx v8, v8, a1 1005; CHECK-NEXT: vs2r.v v8, (a6) 1006; CHECK-NEXT: add a6, a6, a5 1007; CHECK-NEXT: bnez a7, .LBB17_3 1008; CHECK-NEXT: # %bb.4: # %middle.block 1009; CHECK-NEXT: beqz a4, .LBB17_7 1010; CHECK-NEXT: .LBB17_5: # %for.body.preheader 1011; CHECK-NEXT: slli a2, a2, 2 1012; CHECK-NEXT: lui a3, 1 1013; CHECK-NEXT: add a2, a0, a2 1014; CHECK-NEXT: add a0, a0, a3 1015; CHECK-NEXT: .LBB17_6: # %for.body 1016; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1017; CHECK-NEXT: lw a3, 0(a2) 1018; CHECK-NEXT: sllw a3, a3, a1 1019; CHECK-NEXT: sw a3, 0(a2) 1020; CHECK-NEXT: addi a2, a2, 4 1021; CHECK-NEXT: bne a2, a0, .LBB17_6 1022; CHECK-NEXT: .LBB17_7: # %for.cond.cleanup 1023; CHECK-NEXT: ret 1024entry: 1025 %0 = call i64 @llvm.vscale.i64() 1026 %1 = shl i64 %0, 2 1027 %min.iters.check = icmp ugt i64 %1, 1024 1028 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1029 1030vector.ph: ; preds = %entry 1031 %2 = call i64 @llvm.vscale.i64() 1032 %3 = shl i64 %2, 2 1033 %n.mod.vf = urem i64 1024, %3 1034 %n.vec = sub nsw i64 1024, %n.mod.vf 1035 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 1036 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 1037 %4 = call i64 @llvm.vscale.i64() 1038 %5 = shl i64 %4, 2 1039 br label %vector.body 1040 1041vector.body: ; preds = %vector.body, %vector.ph 1042 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1043 %6 = getelementptr inbounds i32, ptr %a, i64 %index 1044 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 1045 %7 = shl <vscale x 4 x i32> %wide.load, %broadcast.splat 1046 store <vscale x 4 x i32> %7, ptr %6, align 4 1047 %index.next = add nuw i64 %index, %5 1048 %8 = icmp eq i64 %index.next, %n.vec 1049 br i1 %8, label %middle.block, label %vector.body 1050 1051middle.block: ; preds = %vector.body 1052 %cmp.n = icmp eq i64 %n.mod.vf, 0 1053 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1054 1055for.body.preheader: ; preds = %entry, %middle.block 1056 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1057 br label %for.body 1058 1059for.cond.cleanup: ; preds = %for.body, %middle.block 1060 ret void 1061 1062for.body: ; preds = %for.body.preheader, %for.body 1063 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1064 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 1065 %9 = load i32, ptr %arrayidx, align 4 1066 %shl = shl i32 %9, %x 1067 store i32 %shl, ptr %arrayidx, align 4 1068 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1069 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1070 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1071} 1072 1073define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) { 1074; CHECK-LABEL: sink_splat_lshr_scalable: 1075; CHECK: # %bb.0: # %entry 1076; CHECK-NEXT: csrr a5, vlenb 1077; CHECK-NEXT: srli a3, a5, 1 1078; CHECK-NEXT: li a2, 1024 1079; CHECK-NEXT: bgeu a2, a3, .LBB18_2 1080; CHECK-NEXT: # %bb.1: 1081; CHECK-NEXT: li a2, 0 1082; CHECK-NEXT: j .LBB18_5 1083; CHECK-NEXT: .LBB18_2: # %vector.ph 1084; CHECK-NEXT: addi a2, a3, -1 1085; CHECK-NEXT: andi a4, a2, 1024 1086; CHECK-NEXT: xori a2, a4, 1024 1087; CHECK-NEXT: slli a5, a5, 1 1088; CHECK-NEXT: mv a6, a0 1089; CHECK-NEXT: mv a7, a2 1090; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 1091; CHECK-NEXT: .LBB18_3: # %vector.body 1092; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1093; CHECK-NEXT: vl2re32.v v8, (a6) 1094; CHECK-NEXT: sub a7, a7, a3 1095; CHECK-NEXT: vsrl.vx v8, v8, a1 1096; CHECK-NEXT: vs2r.v v8, (a6) 1097; CHECK-NEXT: add a6, a6, a5 1098; CHECK-NEXT: bnez a7, .LBB18_3 1099; CHECK-NEXT: # %bb.4: # %middle.block 1100; CHECK-NEXT: beqz a4, .LBB18_7 1101; CHECK-NEXT: .LBB18_5: # %for.body.preheader 1102; CHECK-NEXT: slli a2, a2, 2 1103; CHECK-NEXT: lui a3, 1 1104; CHECK-NEXT: add a2, a0, a2 1105; CHECK-NEXT: add a0, a0, a3 1106; CHECK-NEXT: .LBB18_6: # %for.body 1107; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1108; CHECK-NEXT: lw a3, 0(a2) 1109; CHECK-NEXT: srlw a3, a3, a1 1110; CHECK-NEXT: sw a3, 0(a2) 1111; CHECK-NEXT: addi a2, a2, 4 1112; CHECK-NEXT: bne a2, a0, .LBB18_6 1113; CHECK-NEXT: .LBB18_7: # %for.cond.cleanup 1114; CHECK-NEXT: ret 1115entry: 1116 %0 = call i64 @llvm.vscale.i64() 1117 %1 = shl i64 %0, 2 1118 %min.iters.check = icmp ugt i64 %1, 1024 1119 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1120 1121vector.ph: ; preds = %entry 1122 %2 = call i64 @llvm.vscale.i64() 1123 %3 = shl i64 %2, 2 1124 %n.mod.vf = urem i64 1024, %3 1125 %n.vec = sub nsw i64 1024, %n.mod.vf 1126 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 1127 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 1128 %4 = call i64 @llvm.vscale.i64() 1129 %5 = shl i64 %4, 2 1130 br label %vector.body 1131 1132vector.body: ; preds = %vector.body, %vector.ph 1133 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1134 %6 = getelementptr inbounds i32, ptr %a, i64 %index 1135 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 1136 %7 = lshr <vscale x 4 x i32> %wide.load, %broadcast.splat 1137 store <vscale x 4 x i32> %7, ptr %6, align 4 1138 %index.next = add nuw i64 %index, %5 1139 %8 = icmp eq i64 %index.next, %n.vec 1140 br i1 %8, label %middle.block, label %vector.body 1141 1142middle.block: ; preds = %vector.body 1143 %cmp.n = icmp eq i64 %n.mod.vf, 0 1144 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1145 1146for.body.preheader: ; preds = %entry, %middle.block 1147 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1148 br label %for.body 1149 1150for.cond.cleanup: ; preds = %for.body, %middle.block 1151 ret void 1152 1153for.body: ; preds = %for.body.preheader, %for.body 1154 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1155 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 1156 %9 = load i32, ptr %arrayidx, align 4 1157 %lshr = lshr i32 %9, %x 1158 store i32 %lshr, ptr %arrayidx, align 4 1159 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1160 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1161 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1162} 1163 1164define void @sink_splat_ashr_scalable(ptr nocapture %a) { 1165; CHECK-LABEL: sink_splat_ashr_scalable: 1166; CHECK: # %bb.0: # %entry 1167; CHECK-NEXT: csrr a4, vlenb 1168; CHECK-NEXT: srli a2, a4, 1 1169; CHECK-NEXT: li a1, 1024 1170; CHECK-NEXT: bgeu a1, a2, .LBB19_2 1171; CHECK-NEXT: # %bb.1: 1172; CHECK-NEXT: li a1, 0 1173; CHECK-NEXT: j .LBB19_5 1174; CHECK-NEXT: .LBB19_2: # %vector.ph 1175; CHECK-NEXT: addi a1, a2, -1 1176; CHECK-NEXT: andi a3, a1, 1024 1177; CHECK-NEXT: xori a1, a3, 1024 1178; CHECK-NEXT: slli a4, a4, 1 1179; CHECK-NEXT: mv a5, a0 1180; CHECK-NEXT: mv a6, a1 1181; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, ma 1182; CHECK-NEXT: .LBB19_3: # %vector.body 1183; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1184; CHECK-NEXT: vl2re32.v v8, (a5) 1185; CHECK-NEXT: sub a6, a6, a2 1186; CHECK-NEXT: vsra.vi v8, v8, 2 1187; CHECK-NEXT: vs2r.v v8, (a5) 1188; CHECK-NEXT: add a5, a5, a4 1189; CHECK-NEXT: bnez a6, .LBB19_3 1190; CHECK-NEXT: # %bb.4: # %middle.block 1191; CHECK-NEXT: beqz a3, .LBB19_7 1192; CHECK-NEXT: .LBB19_5: # %for.body.preheader 1193; CHECK-NEXT: slli a1, a1, 2 1194; CHECK-NEXT: lui a2, 1 1195; CHECK-NEXT: add a1, a0, a1 1196; CHECK-NEXT: add a0, a0, a2 1197; CHECK-NEXT: .LBB19_6: # %for.body 1198; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1199; CHECK-NEXT: lw a2, 0(a1) 1200; CHECK-NEXT: srli a2, a2, 2 1201; CHECK-NEXT: sw a2, 0(a1) 1202; CHECK-NEXT: addi a1, a1, 4 1203; CHECK-NEXT: bne a1, a0, .LBB19_6 1204; CHECK-NEXT: .LBB19_7: # %for.cond.cleanup 1205; CHECK-NEXT: ret 1206entry: 1207 %0 = call i64 @llvm.vscale.i64() 1208 %1 = shl i64 %0, 2 1209 %min.iters.check = icmp ugt i64 %1, 1024 1210 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1211 1212vector.ph: ; preds = %entry 1213 %2 = call i64 @llvm.vscale.i64() 1214 %3 = shl i64 %2, 2 1215 %n.mod.vf = urem i64 1024, %3 1216 %n.vec = sub nsw i64 1024, %n.mod.vf 1217 %4 = call i64 @llvm.vscale.i64() 1218 %5 = shl i64 %4, 2 1219 br label %vector.body 1220 1221vector.body: ; preds = %vector.body, %vector.ph 1222 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1223 %6 = getelementptr inbounds i32, ptr %a, i64 %index 1224 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 1225 %7 = ashr <vscale x 4 x i32> %wide.load, splat (i32 2) 1226 store <vscale x 4 x i32> %7, ptr %6, align 4 1227 %index.next = add nuw i64 %index, %5 1228 %8 = icmp eq i64 %index.next, %n.vec 1229 br i1 %8, label %middle.block, label %vector.body 1230 1231middle.block: ; preds = %vector.body 1232 %cmp.n = icmp eq i64 %n.mod.vf, 0 1233 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1234 1235for.body.preheader: ; preds = %entry, %middle.block 1236 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1237 br label %for.body 1238 1239for.cond.cleanup: ; preds = %for.body, %middle.block 1240 ret void 1241 1242for.body: ; preds = %for.body.preheader, %for.body 1243 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1244 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 1245 %9 = load i32, ptr %arrayidx, align 4 1246 %ashr = ashr i32 %9, 2 1247 store i32 %ashr, ptr %arrayidx, align 4 1248 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1249 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1250 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1251} 1252 1253define void @sink_splat_fmul(ptr nocapture %a, float %x) { 1254; CHECK-LABEL: sink_splat_fmul: 1255; CHECK: # %bb.0: # %entry 1256; CHECK-NEXT: lui a1, 1 1257; CHECK-NEXT: add a1, a0, a1 1258; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1259; CHECK-NEXT: .LBB20_1: # %vector.body 1260; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1261; CHECK-NEXT: vle32.v v8, (a0) 1262; CHECK-NEXT: vfmul.vf v8, v8, fa0 1263; CHECK-NEXT: vse32.v v8, (a0) 1264; CHECK-NEXT: addi a0, a0, 16 1265; CHECK-NEXT: bne a0, a1, .LBB20_1 1266; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1267; CHECK-NEXT: ret 1268entry: 1269 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 1270 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 1271 br label %vector.body 1272 1273vector.body: ; preds = %vector.body, %entry 1274 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1275 %0 = getelementptr inbounds float, ptr %a, i64 %index 1276 %wide.load = load <4 x float>, ptr %0, align 4 1277 %1 = fmul <4 x float> %wide.load, %broadcast.splat 1278 store <4 x float> %1, ptr %0, align 4 1279 %index.next = add nuw i64 %index, 4 1280 %2 = icmp eq i64 %index.next, 1024 1281 br i1 %2, label %for.cond.cleanup, label %vector.body 1282 1283for.cond.cleanup: ; preds = %vector.body 1284 ret void 1285} 1286 1287define void @sink_splat_fdiv(ptr nocapture %a, float %x) { 1288; CHECK-LABEL: sink_splat_fdiv: 1289; CHECK: # %bb.0: # %entry 1290; CHECK-NEXT: lui a1, 1 1291; CHECK-NEXT: add a1, a0, a1 1292; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1293; CHECK-NEXT: .LBB21_1: # %vector.body 1294; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1295; CHECK-NEXT: vle32.v v8, (a0) 1296; CHECK-NEXT: vfdiv.vf v8, v8, fa0 1297; CHECK-NEXT: vse32.v v8, (a0) 1298; CHECK-NEXT: addi a0, a0, 16 1299; CHECK-NEXT: bne a0, a1, .LBB21_1 1300; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1301; CHECK-NEXT: ret 1302entry: 1303 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 1304 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 1305 br label %vector.body 1306 1307vector.body: ; preds = %vector.body, %entry 1308 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1309 %0 = getelementptr inbounds float, ptr %a, i64 %index 1310 %wide.load = load <4 x float>, ptr %0, align 4 1311 %1 = fdiv <4 x float> %wide.load, %broadcast.splat 1312 store <4 x float> %1, ptr %0, align 4 1313 %index.next = add nuw i64 %index, 4 1314 %2 = icmp eq i64 %index.next, 1024 1315 br i1 %2, label %for.cond.cleanup, label %vector.body 1316 1317for.cond.cleanup: ; preds = %vector.body 1318 ret void 1319} 1320 1321define void @sink_splat_frdiv(ptr nocapture %a, float %x) { 1322; CHECK-LABEL: sink_splat_frdiv: 1323; CHECK: # %bb.0: # %entry 1324; CHECK-NEXT: lui a1, 1 1325; CHECK-NEXT: add a1, a0, a1 1326; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1327; CHECK-NEXT: .LBB22_1: # %vector.body 1328; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1329; CHECK-NEXT: vle32.v v8, (a0) 1330; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 1331; CHECK-NEXT: vse32.v v8, (a0) 1332; CHECK-NEXT: addi a0, a0, 16 1333; CHECK-NEXT: bne a0, a1, .LBB22_1 1334; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1335; CHECK-NEXT: ret 1336entry: 1337 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 1338 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 1339 br label %vector.body 1340 1341vector.body: ; preds = %vector.body, %entry 1342 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1343 %0 = getelementptr inbounds float, ptr %a, i64 %index 1344 %wide.load = load <4 x float>, ptr %0, align 4 1345 %1 = fdiv <4 x float> %broadcast.splat, %wide.load 1346 store <4 x float> %1, ptr %0, align 4 1347 %index.next = add nuw i64 %index, 4 1348 %2 = icmp eq i64 %index.next, 1024 1349 br i1 %2, label %for.cond.cleanup, label %vector.body 1350 1351for.cond.cleanup: ; preds = %vector.body 1352 ret void 1353} 1354 1355define void @sink_splat_fadd(ptr nocapture %a, float %x) { 1356; CHECK-LABEL: sink_splat_fadd: 1357; CHECK: # %bb.0: # %entry 1358; CHECK-NEXT: lui a1, 1 1359; CHECK-NEXT: add a1, a0, a1 1360; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1361; CHECK-NEXT: .LBB23_1: # %vector.body 1362; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1363; CHECK-NEXT: vle32.v v8, (a0) 1364; CHECK-NEXT: vfadd.vf v8, v8, fa0 1365; CHECK-NEXT: vse32.v v8, (a0) 1366; CHECK-NEXT: addi a0, a0, 16 1367; CHECK-NEXT: bne a0, a1, .LBB23_1 1368; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1369; CHECK-NEXT: ret 1370entry: 1371 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 1372 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 1373 br label %vector.body 1374 1375vector.body: ; preds = %vector.body, %entry 1376 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1377 %0 = getelementptr inbounds float, ptr %a, i64 %index 1378 %wide.load = load <4 x float>, ptr %0, align 4 1379 %1 = fadd <4 x float> %wide.load, %broadcast.splat 1380 store <4 x float> %1, ptr %0, align 4 1381 %index.next = add nuw i64 %index, 4 1382 %2 = icmp eq i64 %index.next, 1024 1383 br i1 %2, label %for.cond.cleanup, label %vector.body 1384 1385for.cond.cleanup: ; preds = %vector.body 1386 ret void 1387} 1388 1389define void @sink_splat_fsub(ptr nocapture %a, float %x) { 1390; CHECK-LABEL: sink_splat_fsub: 1391; CHECK: # %bb.0: # %entry 1392; CHECK-NEXT: lui a1, 1 1393; CHECK-NEXT: add a1, a0, a1 1394; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1395; CHECK-NEXT: .LBB24_1: # %vector.body 1396; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1397; CHECK-NEXT: vle32.v v8, (a0) 1398; CHECK-NEXT: vfsub.vf v8, v8, fa0 1399; CHECK-NEXT: vse32.v v8, (a0) 1400; CHECK-NEXT: addi a0, a0, 16 1401; CHECK-NEXT: bne a0, a1, .LBB24_1 1402; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1403; CHECK-NEXT: ret 1404entry: 1405 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 1406 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 1407 br label %vector.body 1408 1409vector.body: ; preds = %vector.body, %entry 1410 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1411 %0 = getelementptr inbounds float, ptr %a, i64 %index 1412 %wide.load = load <4 x float>, ptr %0, align 4 1413 %1 = fsub <4 x float> %wide.load, %broadcast.splat 1414 store <4 x float> %1, ptr %0, align 4 1415 %index.next = add nuw i64 %index, 4 1416 %2 = icmp eq i64 %index.next, 1024 1417 br i1 %2, label %for.cond.cleanup, label %vector.body 1418 1419for.cond.cleanup: ; preds = %vector.body 1420 ret void 1421} 1422 1423define void @sink_splat_frsub(ptr nocapture %a, float %x) { 1424; CHECK-LABEL: sink_splat_frsub: 1425; CHECK: # %bb.0: # %entry 1426; CHECK-NEXT: lui a1, 1 1427; CHECK-NEXT: add a1, a0, a1 1428; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1429; CHECK-NEXT: .LBB25_1: # %vector.body 1430; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1431; CHECK-NEXT: vle32.v v8, (a0) 1432; CHECK-NEXT: vfrsub.vf v8, v8, fa0 1433; CHECK-NEXT: vse32.v v8, (a0) 1434; CHECK-NEXT: addi a0, a0, 16 1435; CHECK-NEXT: bne a0, a1, .LBB25_1 1436; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1437; CHECK-NEXT: ret 1438entry: 1439 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 1440 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 1441 br label %vector.body 1442 1443vector.body: ; preds = %vector.body, %entry 1444 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1445 %0 = getelementptr inbounds float, ptr %a, i64 %index 1446 %wide.load = load <4 x float>, ptr %0, align 4 1447 %1 = fsub <4 x float> %broadcast.splat, %wide.load 1448 store <4 x float> %1, ptr %0, align 4 1449 %index.next = add nuw i64 %index, 4 1450 %2 = icmp eq i64 %index.next, 1024 1451 br i1 %2, label %for.cond.cleanup, label %vector.body 1452 1453for.cond.cleanup: ; preds = %vector.body 1454 ret void 1455} 1456 1457define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) { 1458; CHECK-LABEL: sink_splat_fmul_scalable: 1459; CHECK: # %bb.0: # %entry 1460; CHECK-NEXT: csrr a1, vlenb 1461; CHECK-NEXT: srli a3, a1, 2 1462; CHECK-NEXT: li a2, 1024 1463; CHECK-NEXT: bgeu a2, a3, .LBB26_2 1464; CHECK-NEXT: # %bb.1: 1465; CHECK-NEXT: li a2, 0 1466; CHECK-NEXT: j .LBB26_5 1467; CHECK-NEXT: .LBB26_2: # %vector.ph 1468; CHECK-NEXT: addi a2, a3, -1 1469; CHECK-NEXT: andi a4, a2, 1024 1470; CHECK-NEXT: xori a2, a4, 1024 1471; CHECK-NEXT: mv a5, a0 1472; CHECK-NEXT: mv a6, a2 1473; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma 1474; CHECK-NEXT: .LBB26_3: # %vector.body 1475; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1476; CHECK-NEXT: vl1re32.v v8, (a5) 1477; CHECK-NEXT: sub a6, a6, a3 1478; CHECK-NEXT: vfmul.vf v8, v8, fa0 1479; CHECK-NEXT: vs1r.v v8, (a5) 1480; CHECK-NEXT: add a5, a5, a1 1481; CHECK-NEXT: bnez a6, .LBB26_3 1482; CHECK-NEXT: # %bb.4: # %middle.block 1483; CHECK-NEXT: beqz a4, .LBB26_7 1484; CHECK-NEXT: .LBB26_5: # %for.body.preheader 1485; CHECK-NEXT: slli a1, a2, 2 1486; CHECK-NEXT: lui a2, 1 1487; CHECK-NEXT: add a1, a0, a1 1488; CHECK-NEXT: add a0, a0, a2 1489; CHECK-NEXT: .LBB26_6: # %for.body 1490; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1491; CHECK-NEXT: flw fa5, 0(a1) 1492; CHECK-NEXT: fmul.s fa5, fa5, fa0 1493; CHECK-NEXT: fsw fa5, 0(a1) 1494; CHECK-NEXT: addi a1, a1, 4 1495; CHECK-NEXT: bne a1, a0, .LBB26_6 1496; CHECK-NEXT: .LBB26_7: # %for.cond.cleanup 1497; CHECK-NEXT: ret 1498entry: 1499 %0 = call i64 @llvm.vscale.i64() 1500 %1 = shl i64 %0, 1 1501 %min.iters.check = icmp ugt i64 %1, 1024 1502 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1503 1504vector.ph: ; preds = %entry 1505 %2 = call i64 @llvm.vscale.i64() 1506 %3 = shl i64 %2, 1 1507 %n.mod.vf = urem i64 1024, %3 1508 %n.vec = sub nsw i64 1024, %n.mod.vf 1509 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 1510 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1511 %4 = call i64 @llvm.vscale.i64() 1512 %5 = shl i64 %4, 1 1513 br label %vector.body 1514 1515vector.body: ; preds = %vector.body, %vector.ph 1516 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1517 %6 = getelementptr inbounds float, ptr %a, i64 %index 1518 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 1519 %7 = fmul <vscale x 2 x float> %wide.load, %broadcast.splat 1520 store <vscale x 2 x float> %7, ptr %6, align 4 1521 %index.next = add nuw i64 %index, %5 1522 %8 = icmp eq i64 %index.next, %n.vec 1523 br i1 %8, label %middle.block, label %vector.body 1524 1525middle.block: ; preds = %vector.body 1526 %cmp.n = icmp eq i64 %n.mod.vf, 0 1527 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1528 1529for.body.preheader: ; preds = %entry, %middle.block 1530 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1531 br label %for.body 1532 1533for.cond.cleanup: ; preds = %for.body, %middle.block 1534 ret void 1535 1536for.body: ; preds = %for.body.preheader, %for.body 1537 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1538 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1539 %9 = load float, ptr %arrayidx, align 4 1540 %mul = fmul float %9, %x 1541 store float %mul, ptr %arrayidx, align 4 1542 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1543 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1544 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1545} 1546 1547define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) { 1548; CHECK-LABEL: sink_splat_fdiv_scalable: 1549; CHECK: # %bb.0: # %entry 1550; CHECK-NEXT: csrr a1, vlenb 1551; CHECK-NEXT: srli a3, a1, 2 1552; CHECK-NEXT: li a2, 1024 1553; CHECK-NEXT: bgeu a2, a3, .LBB27_2 1554; CHECK-NEXT: # %bb.1: 1555; CHECK-NEXT: li a2, 0 1556; CHECK-NEXT: j .LBB27_5 1557; CHECK-NEXT: .LBB27_2: # %vector.ph 1558; CHECK-NEXT: addi a2, a3, -1 1559; CHECK-NEXT: andi a4, a2, 1024 1560; CHECK-NEXT: xori a2, a4, 1024 1561; CHECK-NEXT: mv a5, a0 1562; CHECK-NEXT: mv a6, a2 1563; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma 1564; CHECK-NEXT: .LBB27_3: # %vector.body 1565; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1566; CHECK-NEXT: vl1re32.v v8, (a5) 1567; CHECK-NEXT: sub a6, a6, a3 1568; CHECK-NEXT: vfdiv.vf v8, v8, fa0 1569; CHECK-NEXT: vs1r.v v8, (a5) 1570; CHECK-NEXT: add a5, a5, a1 1571; CHECK-NEXT: bnez a6, .LBB27_3 1572; CHECK-NEXT: # %bb.4: # %middle.block 1573; CHECK-NEXT: beqz a4, .LBB27_7 1574; CHECK-NEXT: .LBB27_5: # %for.body.preheader 1575; CHECK-NEXT: slli a1, a2, 2 1576; CHECK-NEXT: lui a2, 1 1577; CHECK-NEXT: add a1, a0, a1 1578; CHECK-NEXT: add a0, a0, a2 1579; CHECK-NEXT: .LBB27_6: # %for.body 1580; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1581; CHECK-NEXT: flw fa5, 0(a1) 1582; CHECK-NEXT: fdiv.s fa5, fa5, fa0 1583; CHECK-NEXT: fsw fa5, 0(a1) 1584; CHECK-NEXT: addi a1, a1, 4 1585; CHECK-NEXT: bne a1, a0, .LBB27_6 1586; CHECK-NEXT: .LBB27_7: # %for.cond.cleanup 1587; CHECK-NEXT: ret 1588entry: 1589 %0 = call i64 @llvm.vscale.i64() 1590 %1 = shl i64 %0, 1 1591 %min.iters.check = icmp ugt i64 %1, 1024 1592 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1593 1594vector.ph: ; preds = %entry 1595 %2 = call i64 @llvm.vscale.i64() 1596 %3 = shl i64 %2, 1 1597 %n.mod.vf = urem i64 1024, %3 1598 %n.vec = sub nsw i64 1024, %n.mod.vf 1599 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 1600 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1601 %4 = call i64 @llvm.vscale.i64() 1602 %5 = shl i64 %4, 1 1603 br label %vector.body 1604 1605vector.body: ; preds = %vector.body, %vector.ph 1606 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1607 %6 = getelementptr inbounds float, ptr %a, i64 %index 1608 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 1609 %7 = fdiv <vscale x 2 x float> %wide.load, %broadcast.splat 1610 store <vscale x 2 x float> %7, ptr %6, align 4 1611 %index.next = add nuw i64 %index, %5 1612 %8 = icmp eq i64 %index.next, %n.vec 1613 br i1 %8, label %middle.block, label %vector.body 1614 1615middle.block: ; preds = %vector.body 1616 %cmp.n = icmp eq i64 %n.mod.vf, 0 1617 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1618 1619for.body.preheader: ; preds = %entry, %middle.block 1620 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1621 br label %for.body 1622 1623for.cond.cleanup: ; preds = %for.body, %middle.block 1624 ret void 1625 1626for.body: ; preds = %for.body.preheader, %for.body 1627 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1628 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1629 %9 = load float, ptr %arrayidx, align 4 1630 %mul = fdiv float %9, %x 1631 store float %mul, ptr %arrayidx, align 4 1632 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1633 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1634 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1635} 1636 1637define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) { 1638; CHECK-LABEL: sink_splat_frdiv_scalable: 1639; CHECK: # %bb.0: # %entry 1640; CHECK-NEXT: csrr a1, vlenb 1641; CHECK-NEXT: srli a3, a1, 2 1642; CHECK-NEXT: li a2, 1024 1643; CHECK-NEXT: bgeu a2, a3, .LBB28_2 1644; CHECK-NEXT: # %bb.1: 1645; CHECK-NEXT: li a2, 0 1646; CHECK-NEXT: j .LBB28_5 1647; CHECK-NEXT: .LBB28_2: # %vector.ph 1648; CHECK-NEXT: addi a2, a3, -1 1649; CHECK-NEXT: andi a4, a2, 1024 1650; CHECK-NEXT: xori a2, a4, 1024 1651; CHECK-NEXT: mv a5, a0 1652; CHECK-NEXT: mv a6, a2 1653; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma 1654; CHECK-NEXT: .LBB28_3: # %vector.body 1655; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1656; CHECK-NEXT: vl1re32.v v8, (a5) 1657; CHECK-NEXT: sub a6, a6, a3 1658; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 1659; CHECK-NEXT: vs1r.v v8, (a5) 1660; CHECK-NEXT: add a5, a5, a1 1661; CHECK-NEXT: bnez a6, .LBB28_3 1662; CHECK-NEXT: # %bb.4: # %middle.block 1663; CHECK-NEXT: beqz a4, .LBB28_7 1664; CHECK-NEXT: .LBB28_5: # %for.body.preheader 1665; CHECK-NEXT: slli a1, a2, 2 1666; CHECK-NEXT: lui a2, 1 1667; CHECK-NEXT: add a1, a0, a1 1668; CHECK-NEXT: add a0, a0, a2 1669; CHECK-NEXT: .LBB28_6: # %for.body 1670; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1671; CHECK-NEXT: flw fa5, 0(a1) 1672; CHECK-NEXT: fdiv.s fa5, fa0, fa5 1673; CHECK-NEXT: fsw fa5, 0(a1) 1674; CHECK-NEXT: addi a1, a1, 4 1675; CHECK-NEXT: bne a1, a0, .LBB28_6 1676; CHECK-NEXT: .LBB28_7: # %for.cond.cleanup 1677; CHECK-NEXT: ret 1678entry: 1679 %0 = call i64 @llvm.vscale.i64() 1680 %1 = shl i64 %0, 1 1681 %min.iters.check = icmp ugt i64 %1, 1024 1682 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1683 1684vector.ph: ; preds = %entry 1685 %2 = call i64 @llvm.vscale.i64() 1686 %3 = shl i64 %2, 1 1687 %n.mod.vf = urem i64 1024, %3 1688 %n.vec = sub nsw i64 1024, %n.mod.vf 1689 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 1690 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1691 %4 = call i64 @llvm.vscale.i64() 1692 %5 = shl i64 %4, 1 1693 br label %vector.body 1694 1695vector.body: ; preds = %vector.body, %vector.ph 1696 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1697 %6 = getelementptr inbounds float, ptr %a, i64 %index 1698 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 1699 %7 = fdiv <vscale x 2 x float> %broadcast.splat, %wide.load 1700 store <vscale x 2 x float> %7, ptr %6, align 4 1701 %index.next = add nuw i64 %index, %5 1702 %8 = icmp eq i64 %index.next, %n.vec 1703 br i1 %8, label %middle.block, label %vector.body 1704 1705middle.block: ; preds = %vector.body 1706 %cmp.n = icmp eq i64 %n.mod.vf, 0 1707 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1708 1709for.body.preheader: ; preds = %entry, %middle.block 1710 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1711 br label %for.body 1712 1713for.cond.cleanup: ; preds = %for.body, %middle.block 1714 ret void 1715 1716for.body: ; preds = %for.body.preheader, %for.body 1717 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1718 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1719 %9 = load float, ptr %arrayidx, align 4 1720 %mul = fdiv float %x, %9 1721 store float %mul, ptr %arrayidx, align 4 1722 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1723 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1724 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1725} 1726 1727define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { 1728; CHECK-LABEL: sink_splat_fadd_scalable: 1729; CHECK: # %bb.0: # %entry 1730; CHECK-NEXT: csrr a1, vlenb 1731; CHECK-NEXT: srli a3, a1, 2 1732; CHECK-NEXT: li a2, 1024 1733; CHECK-NEXT: bgeu a2, a3, .LBB29_2 1734; CHECK-NEXT: # %bb.1: 1735; CHECK-NEXT: li a2, 0 1736; CHECK-NEXT: j .LBB29_5 1737; CHECK-NEXT: .LBB29_2: # %vector.ph 1738; CHECK-NEXT: addi a2, a3, -1 1739; CHECK-NEXT: andi a4, a2, 1024 1740; CHECK-NEXT: xori a2, a4, 1024 1741; CHECK-NEXT: mv a5, a0 1742; CHECK-NEXT: mv a6, a2 1743; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma 1744; CHECK-NEXT: .LBB29_3: # %vector.body 1745; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1746; CHECK-NEXT: vl1re32.v v8, (a5) 1747; CHECK-NEXT: sub a6, a6, a3 1748; CHECK-NEXT: vfadd.vf v8, v8, fa0 1749; CHECK-NEXT: vs1r.v v8, (a5) 1750; CHECK-NEXT: add a5, a5, a1 1751; CHECK-NEXT: bnez a6, .LBB29_3 1752; CHECK-NEXT: # %bb.4: # %middle.block 1753; CHECK-NEXT: beqz a4, .LBB29_7 1754; CHECK-NEXT: .LBB29_5: # %for.body.preheader 1755; CHECK-NEXT: slli a1, a2, 2 1756; CHECK-NEXT: lui a2, 1 1757; CHECK-NEXT: add a1, a0, a1 1758; CHECK-NEXT: add a0, a0, a2 1759; CHECK-NEXT: .LBB29_6: # %for.body 1760; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1761; CHECK-NEXT: flw fa5, 0(a1) 1762; CHECK-NEXT: fadd.s fa5, fa5, fa0 1763; CHECK-NEXT: fsw fa5, 0(a1) 1764; CHECK-NEXT: addi a1, a1, 4 1765; CHECK-NEXT: bne a1, a0, .LBB29_6 1766; CHECK-NEXT: .LBB29_7: # %for.cond.cleanup 1767; CHECK-NEXT: ret 1768entry: 1769 %0 = call i64 @llvm.vscale.i64() 1770 %1 = shl i64 %0, 1 1771 %min.iters.check = icmp ugt i64 %1, 1024 1772 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1773 1774vector.ph: ; preds = %entry 1775 %2 = call i64 @llvm.vscale.i64() 1776 %3 = shl i64 %2, 1 1777 %n.mod.vf = urem i64 1024, %3 1778 %n.vec = sub nsw i64 1024, %n.mod.vf 1779 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 1780 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1781 %4 = call i64 @llvm.vscale.i64() 1782 %5 = shl i64 %4, 1 1783 br label %vector.body 1784 1785vector.body: ; preds = %vector.body, %vector.ph 1786 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1787 %6 = getelementptr inbounds float, ptr %a, i64 %index 1788 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 1789 %7 = fadd <vscale x 2 x float> %wide.load, %broadcast.splat 1790 store <vscale x 2 x float> %7, ptr %6, align 4 1791 %index.next = add nuw i64 %index, %5 1792 %8 = icmp eq i64 %index.next, %n.vec 1793 br i1 %8, label %middle.block, label %vector.body 1794 1795middle.block: ; preds = %vector.body 1796 %cmp.n = icmp eq i64 %n.mod.vf, 0 1797 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1798 1799for.body.preheader: ; preds = %entry, %middle.block 1800 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1801 br label %for.body 1802 1803for.cond.cleanup: ; preds = %for.body, %middle.block 1804 ret void 1805 1806for.body: ; preds = %for.body.preheader, %for.body 1807 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1808 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1809 %9 = load float, ptr %arrayidx, align 4 1810 %mul = fadd float %9, %x 1811 store float %mul, ptr %arrayidx, align 4 1812 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1813 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1814 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1815} 1816 1817define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) { 1818; CHECK-LABEL: sink_splat_fsub_scalable: 1819; CHECK: # %bb.0: # %entry 1820; CHECK-NEXT: csrr a1, vlenb 1821; CHECK-NEXT: srli a3, a1, 2 1822; CHECK-NEXT: li a2, 1024 1823; CHECK-NEXT: bgeu a2, a3, .LBB30_2 1824; CHECK-NEXT: # %bb.1: 1825; CHECK-NEXT: li a2, 0 1826; CHECK-NEXT: j .LBB30_5 1827; CHECK-NEXT: .LBB30_2: # %vector.ph 1828; CHECK-NEXT: addi a2, a3, -1 1829; CHECK-NEXT: andi a4, a2, 1024 1830; CHECK-NEXT: xori a2, a4, 1024 1831; CHECK-NEXT: mv a5, a0 1832; CHECK-NEXT: mv a6, a2 1833; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma 1834; CHECK-NEXT: .LBB30_3: # %vector.body 1835; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1836; CHECK-NEXT: vl1re32.v v8, (a5) 1837; CHECK-NEXT: sub a6, a6, a3 1838; CHECK-NEXT: vfsub.vf v8, v8, fa0 1839; CHECK-NEXT: vs1r.v v8, (a5) 1840; CHECK-NEXT: add a5, a5, a1 1841; CHECK-NEXT: bnez a6, .LBB30_3 1842; CHECK-NEXT: # %bb.4: # %middle.block 1843; CHECK-NEXT: beqz a4, .LBB30_7 1844; CHECK-NEXT: .LBB30_5: # %for.body.preheader 1845; CHECK-NEXT: slli a1, a2, 2 1846; CHECK-NEXT: lui a2, 1 1847; CHECK-NEXT: add a1, a0, a1 1848; CHECK-NEXT: add a0, a0, a2 1849; CHECK-NEXT: .LBB30_6: # %for.body 1850; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1851; CHECK-NEXT: flw fa5, 0(a1) 1852; CHECK-NEXT: fsub.s fa5, fa5, fa0 1853; CHECK-NEXT: fsw fa5, 0(a1) 1854; CHECK-NEXT: addi a1, a1, 4 1855; CHECK-NEXT: bne a1, a0, .LBB30_6 1856; CHECK-NEXT: .LBB30_7: # %for.cond.cleanup 1857; CHECK-NEXT: ret 1858entry: 1859 %0 = call i64 @llvm.vscale.i64() 1860 %1 = shl i64 %0, 1 1861 %min.iters.check = icmp ugt i64 %1, 1024 1862 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1863 1864vector.ph: ; preds = %entry 1865 %2 = call i64 @llvm.vscale.i64() 1866 %3 = shl i64 %2, 1 1867 %n.mod.vf = urem i64 1024, %3 1868 %n.vec = sub nsw i64 1024, %n.mod.vf 1869 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 1870 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1871 %4 = call i64 @llvm.vscale.i64() 1872 %5 = shl i64 %4, 1 1873 br label %vector.body 1874 1875vector.body: ; preds = %vector.body, %vector.ph 1876 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1877 %6 = getelementptr inbounds float, ptr %a, i64 %index 1878 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 1879 %7 = fsub <vscale x 2 x float> %wide.load, %broadcast.splat 1880 store <vscale x 2 x float> %7, ptr %6, align 4 1881 %index.next = add nuw i64 %index, %5 1882 %8 = icmp eq i64 %index.next, %n.vec 1883 br i1 %8, label %middle.block, label %vector.body 1884 1885middle.block: ; preds = %vector.body 1886 %cmp.n = icmp eq i64 %n.mod.vf, 0 1887 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1888 1889for.body.preheader: ; preds = %entry, %middle.block 1890 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1891 br label %for.body 1892 1893for.cond.cleanup: ; preds = %for.body, %middle.block 1894 ret void 1895 1896for.body: ; preds = %for.body.preheader, %for.body 1897 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1898 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1899 %9 = load float, ptr %arrayidx, align 4 1900 %mul = fsub float %9, %x 1901 store float %mul, ptr %arrayidx, align 4 1902 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1903 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1904 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1905} 1906 1907define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) { 1908; CHECK-LABEL: sink_splat_frsub_scalable: 1909; CHECK: # %bb.0: # %entry 1910; CHECK-NEXT: csrr a1, vlenb 1911; CHECK-NEXT: srli a3, a1, 2 1912; CHECK-NEXT: li a2, 1024 1913; CHECK-NEXT: bgeu a2, a3, .LBB31_2 1914; CHECK-NEXT: # %bb.1: 1915; CHECK-NEXT: li a2, 0 1916; CHECK-NEXT: j .LBB31_5 1917; CHECK-NEXT: .LBB31_2: # %vector.ph 1918; CHECK-NEXT: addi a2, a3, -1 1919; CHECK-NEXT: andi a4, a2, 1024 1920; CHECK-NEXT: xori a2, a4, 1024 1921; CHECK-NEXT: mv a5, a0 1922; CHECK-NEXT: mv a6, a2 1923; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma 1924; CHECK-NEXT: .LBB31_3: # %vector.body 1925; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1926; CHECK-NEXT: vl1re32.v v8, (a5) 1927; CHECK-NEXT: sub a6, a6, a3 1928; CHECK-NEXT: vfrsub.vf v8, v8, fa0 1929; CHECK-NEXT: vs1r.v v8, (a5) 1930; CHECK-NEXT: add a5, a5, a1 1931; CHECK-NEXT: bnez a6, .LBB31_3 1932; CHECK-NEXT: # %bb.4: # %middle.block 1933; CHECK-NEXT: beqz a4, .LBB31_7 1934; CHECK-NEXT: .LBB31_5: # %for.body.preheader 1935; CHECK-NEXT: slli a1, a2, 2 1936; CHECK-NEXT: lui a2, 1 1937; CHECK-NEXT: add a1, a0, a1 1938; CHECK-NEXT: add a0, a0, a2 1939; CHECK-NEXT: .LBB31_6: # %for.body 1940; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1941; CHECK-NEXT: flw fa5, 0(a1) 1942; CHECK-NEXT: fsub.s fa5, fa0, fa5 1943; CHECK-NEXT: fsw fa5, 0(a1) 1944; CHECK-NEXT: addi a1, a1, 4 1945; CHECK-NEXT: bne a1, a0, .LBB31_6 1946; CHECK-NEXT: .LBB31_7: # %for.cond.cleanup 1947; CHECK-NEXT: ret 1948entry: 1949 %0 = call i64 @llvm.vscale.i64() 1950 %1 = shl i64 %0, 1 1951 %min.iters.check = icmp ugt i64 %1, 1024 1952 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 1953 1954vector.ph: ; preds = %entry 1955 %2 = call i64 @llvm.vscale.i64() 1956 %3 = shl i64 %2, 1 1957 %n.mod.vf = urem i64 1024, %3 1958 %n.vec = sub nsw i64 1024, %n.mod.vf 1959 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 1960 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1961 %4 = call i64 @llvm.vscale.i64() 1962 %5 = shl i64 %4, 1 1963 br label %vector.body 1964 1965vector.body: ; preds = %vector.body, %vector.ph 1966 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1967 %6 = getelementptr inbounds float, ptr %a, i64 %index 1968 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 1969 %7 = fsub <vscale x 2 x float> %broadcast.splat, %wide.load 1970 store <vscale x 2 x float> %7, ptr %6, align 4 1971 %index.next = add nuw i64 %index, %5 1972 %8 = icmp eq i64 %index.next, %n.vec 1973 br i1 %8, label %middle.block, label %vector.body 1974 1975middle.block: ; preds = %vector.body 1976 %cmp.n = icmp eq i64 %n.mod.vf, 0 1977 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 1978 1979for.body.preheader: ; preds = %entry, %middle.block 1980 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 1981 br label %for.body 1982 1983for.cond.cleanup: ; preds = %for.body, %middle.block 1984 ret void 1985 1986for.body: ; preds = %for.body.preheader, %for.body 1987 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 1988 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 1989 %9 = load float, ptr %arrayidx, align 4 1990 %mul = fsub float %x, %9 1991 store float %mul, ptr %arrayidx, align 4 1992 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1993 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 1994 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 1995} 1996 1997define void @sink_splat_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) { 1998; CHECK-LABEL: sink_splat_fma: 1999; CHECK: # %bb.0: # %entry 2000; CHECK-NEXT: lui a2, 1 2001; CHECK-NEXT: add a2, a1, a2 2002; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2003; CHECK-NEXT: .LBB32_1: # %vector.body 2004; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2005; CHECK-NEXT: vle32.v v8, (a0) 2006; CHECK-NEXT: vle32.v v9, (a1) 2007; CHECK-NEXT: addi a1, a1, 16 2008; CHECK-NEXT: vfmacc.vf v9, fa0, v8 2009; CHECK-NEXT: vse32.v v9, (a0) 2010; CHECK-NEXT: addi a0, a0, 16 2011; CHECK-NEXT: bne a1, a2, .LBB32_1 2012; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2013; CHECK-NEXT: ret 2014entry: 2015 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 2016 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 2017 br label %vector.body 2018 2019vector.body: ; preds = %vector.body, %entry 2020 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2021 %0 = getelementptr inbounds float, ptr %a, i64 %index 2022 %wide.load = load <4 x float>, ptr %0, align 4 2023 %1 = getelementptr inbounds float, ptr %b, i64 %index 2024 %wide.load12 = load <4 x float>, ptr %1, align 4 2025 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12) 2026 store <4 x float> %2, ptr %0, align 4 2027 %index.next = add nuw i64 %index, 4 2028 %3 = icmp eq i64 %index.next, 1024 2029 br i1 %3, label %for.cond.cleanup, label %vector.body 2030 2031for.cond.cleanup: ; preds = %vector.body 2032 ret void 2033} 2034 2035define void @sink_splat_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) { 2036; CHECK-LABEL: sink_splat_fma_commute: 2037; CHECK: # %bb.0: # %entry 2038; CHECK-NEXT: lui a2, 1 2039; CHECK-NEXT: add a2, a1, a2 2040; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2041; CHECK-NEXT: .LBB33_1: # %vector.body 2042; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2043; CHECK-NEXT: vle32.v v8, (a0) 2044; CHECK-NEXT: vle32.v v9, (a1) 2045; CHECK-NEXT: addi a1, a1, 16 2046; CHECK-NEXT: vfmacc.vf v9, fa0, v8 2047; CHECK-NEXT: vse32.v v9, (a0) 2048; CHECK-NEXT: addi a0, a0, 16 2049; CHECK-NEXT: bne a1, a2, .LBB33_1 2050; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2051; CHECK-NEXT: ret 2052entry: 2053 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 2054 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 2055 br label %vector.body 2056 2057vector.body: ; preds = %vector.body, %entry 2058 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2059 %0 = getelementptr inbounds float, ptr %a, i64 %index 2060 %wide.load = load <4 x float>, ptr %0, align 4 2061 %1 = getelementptr inbounds float, ptr %b, i64 %index 2062 %wide.load12 = load <4 x float>, ptr %1, align 4 2063 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12) 2064 store <4 x float> %2, ptr %0, align 4 2065 %index.next = add nuw i64 %index, 4 2066 %3 = icmp eq i64 %index.next, 1024 2067 br i1 %3, label %for.cond.cleanup, label %vector.body 2068 2069for.cond.cleanup: ; preds = %vector.body 2070 ret void 2071} 2072 2073define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { 2074; CHECK-LABEL: sink_splat_fma_scalable: 2075; CHECK: # %bb.0: # %entry 2076; CHECK-NEXT: csrr a2, vlenb 2077; CHECK-NEXT: srli a4, a2, 2 2078; CHECK-NEXT: li a3, 1024 2079; CHECK-NEXT: bgeu a3, a4, .LBB34_2 2080; CHECK-NEXT: # %bb.1: 2081; CHECK-NEXT: li a3, 0 2082; CHECK-NEXT: j .LBB34_5 2083; CHECK-NEXT: .LBB34_2: # %vector.ph 2084; CHECK-NEXT: addi a3, a4, -1 2085; CHECK-NEXT: andi a5, a3, 1024 2086; CHECK-NEXT: xori a3, a5, 1024 2087; CHECK-NEXT: mv a6, a0 2088; CHECK-NEXT: mv a7, a1 2089; CHECK-NEXT: mv t0, a3 2090; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma 2091; CHECK-NEXT: .LBB34_3: # %vector.body 2092; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2093; CHECK-NEXT: vl1re32.v v8, (a6) 2094; CHECK-NEXT: vl1re32.v v9, (a7) 2095; CHECK-NEXT: sub t0, t0, a4 2096; CHECK-NEXT: add a7, a7, a2 2097; CHECK-NEXT: vfmacc.vf v9, fa0, v8 2098; CHECK-NEXT: vs1r.v v9, (a6) 2099; CHECK-NEXT: add a6, a6, a2 2100; CHECK-NEXT: bnez t0, .LBB34_3 2101; CHECK-NEXT: # %bb.4: # %middle.block 2102; CHECK-NEXT: beqz a5, .LBB34_7 2103; CHECK-NEXT: .LBB34_5: # %for.body.preheader 2104; CHECK-NEXT: slli a2, a3, 2 2105; CHECK-NEXT: lui a3, 1 2106; CHECK-NEXT: add a0, a0, a2 2107; CHECK-NEXT: add a2, a1, a2 2108; CHECK-NEXT: add a1, a1, a3 2109; CHECK-NEXT: .LBB34_6: # %for.body 2110; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2111; CHECK-NEXT: flw fa5, 0(a0) 2112; CHECK-NEXT: flw fa4, 0(a2) 2113; CHECK-NEXT: addi a2, a2, 4 2114; CHECK-NEXT: fmadd.s fa5, fa5, fa0, fa4 2115; CHECK-NEXT: fsw fa5, 0(a0) 2116; CHECK-NEXT: addi a0, a0, 4 2117; CHECK-NEXT: bne a2, a1, .LBB34_6 2118; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup 2119; CHECK-NEXT: ret 2120entry: 2121 %0 = call i64 @llvm.vscale.i64() 2122 %1 = shl i64 %0, 1 2123 %min.iters.check = icmp ugt i64 %1, 1024 2124 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 2125 2126vector.ph: ; preds = %entry 2127 %2 = call i64 @llvm.vscale.i64() 2128 %3 = shl i64 %2, 1 2129 %n.mod.vf = urem i64 1024, %3 2130 %n.vec = sub nsw i64 1024, %n.mod.vf 2131 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 2132 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 2133 %4 = call i64 @llvm.vscale.i64() 2134 %5 = shl i64 %4, 1 2135 br label %vector.body 2136 2137vector.body: ; preds = %vector.body, %vector.ph 2138 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2139 %6 = getelementptr inbounds float, ptr %a, i64 %index 2140 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 2141 %7 = getelementptr inbounds float, ptr %b, i64 %index 2142 %wide.load12 = load <vscale x 2 x float>, ptr %7, align 4 2143 %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %wide.load, <vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load12) 2144 store <vscale x 2 x float> %8, ptr %6, align 4 2145 %index.next = add nuw i64 %index, %5 2146 %9 = icmp eq i64 %index.next, %n.vec 2147 br i1 %9, label %middle.block, label %vector.body 2148 2149middle.block: ; preds = %vector.body 2150 %cmp.n = icmp eq i64 %n.mod.vf, 0 2151 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 2152 2153for.body.preheader: ; preds = %entry, %middle.block 2154 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 2155 br label %for.body 2156 2157for.cond.cleanup: ; preds = %for.body, %middle.block 2158 ret void 2159 2160for.body: ; preds = %for.body.preheader, %for.body 2161 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 2162 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 2163 %10 = load float, ptr %arrayidx, align 4 2164 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv 2165 %11 = load float, ptr %arrayidx2, align 4 2166 %12 = tail call float @llvm.fma.f32(float %10, float %x, float %11) 2167 store float %12, ptr %arrayidx, align 4 2168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2169 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 2170 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 2171} 2172 2173define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { 2174; CHECK-LABEL: sink_splat_fma_commute_scalable: 2175; CHECK: # %bb.0: # %entry 2176; CHECK-NEXT: csrr a2, vlenb 2177; CHECK-NEXT: srli a4, a2, 2 2178; CHECK-NEXT: li a3, 1024 2179; CHECK-NEXT: bgeu a3, a4, .LBB35_2 2180; CHECK-NEXT: # %bb.1: 2181; CHECK-NEXT: li a3, 0 2182; CHECK-NEXT: j .LBB35_5 2183; CHECK-NEXT: .LBB35_2: # %vector.ph 2184; CHECK-NEXT: addi a3, a4, -1 2185; CHECK-NEXT: andi a5, a3, 1024 2186; CHECK-NEXT: xori a3, a5, 1024 2187; CHECK-NEXT: mv a6, a0 2188; CHECK-NEXT: mv a7, a1 2189; CHECK-NEXT: mv t0, a3 2190; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma 2191; CHECK-NEXT: .LBB35_3: # %vector.body 2192; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2193; CHECK-NEXT: vl1re32.v v8, (a6) 2194; CHECK-NEXT: vl1re32.v v9, (a7) 2195; CHECK-NEXT: sub t0, t0, a4 2196; CHECK-NEXT: add a7, a7, a2 2197; CHECK-NEXT: vfmacc.vf v9, fa0, v8 2198; CHECK-NEXT: vs1r.v v9, (a6) 2199; CHECK-NEXT: add a6, a6, a2 2200; CHECK-NEXT: bnez t0, .LBB35_3 2201; CHECK-NEXT: # %bb.4: # %middle.block 2202; CHECK-NEXT: beqz a5, .LBB35_7 2203; CHECK-NEXT: .LBB35_5: # %for.body.preheader 2204; CHECK-NEXT: slli a2, a3, 2 2205; CHECK-NEXT: lui a3, 1 2206; CHECK-NEXT: add a0, a0, a2 2207; CHECK-NEXT: add a2, a1, a2 2208; CHECK-NEXT: add a1, a1, a3 2209; CHECK-NEXT: .LBB35_6: # %for.body 2210; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2211; CHECK-NEXT: flw fa5, 0(a0) 2212; CHECK-NEXT: flw fa4, 0(a2) 2213; CHECK-NEXT: addi a2, a2, 4 2214; CHECK-NEXT: fmadd.s fa5, fa0, fa5, fa4 2215; CHECK-NEXT: fsw fa5, 0(a0) 2216; CHECK-NEXT: addi a0, a0, 4 2217; CHECK-NEXT: bne a2, a1, .LBB35_6 2218; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup 2219; CHECK-NEXT: ret 2220entry: 2221 %0 = call i64 @llvm.vscale.i64() 2222 %1 = shl i64 %0, 1 2223 %min.iters.check = icmp ugt i64 %1, 1024 2224 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 2225 2226vector.ph: ; preds = %entry 2227 %2 = call i64 @llvm.vscale.i64() 2228 %3 = shl i64 %2, 1 2229 %n.mod.vf = urem i64 1024, %3 2230 %n.vec = sub nsw i64 1024, %n.mod.vf 2231 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0 2232 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 2233 %4 = call i64 @llvm.vscale.i64() 2234 %5 = shl i64 %4, 1 2235 br label %vector.body 2236 2237vector.body: ; preds = %vector.body, %vector.ph 2238 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2239 %6 = getelementptr inbounds float, ptr %a, i64 %index 2240 %wide.load = load <vscale x 2 x float>, ptr %6, align 4 2241 %7 = getelementptr inbounds float, ptr %b, i64 %index 2242 %wide.load12 = load <vscale x 2 x float>, ptr %7, align 4 2243 %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load, <vscale x 2 x float> %wide.load12) 2244 store <vscale x 2 x float> %8, ptr %6, align 4 2245 %index.next = add nuw i64 %index, %5 2246 %9 = icmp eq i64 %index.next, %n.vec 2247 br i1 %9, label %middle.block, label %vector.body 2248 2249middle.block: ; preds = %vector.body 2250 %cmp.n = icmp eq i64 %n.mod.vf, 0 2251 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 2252 2253for.body.preheader: ; preds = %entry, %middle.block 2254 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 2255 br label %for.body 2256 2257for.cond.cleanup: ; preds = %for.body, %middle.block 2258 ret void 2259 2260for.body: ; preds = %for.body.preheader, %for.body 2261 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 2262 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 2263 %10 = load float, ptr %arrayidx, align 4 2264 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv 2265 %11 = load float, ptr %arrayidx2, align 4 2266 %12 = tail call float @llvm.fma.f32(float %x, float %10, float %11) 2267 store float %12, ptr %arrayidx, align 4 2268 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2269 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 2270 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 2271} 2272 2273declare i64 @llvm.vscale.i64() 2274declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 2275declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>) 2276declare float @llvm.fma.f32(float, float, float) 2277 2278define void @sink_splat_icmp(ptr nocapture %x, i32 signext %y) { 2279; CHECK-LABEL: sink_splat_icmp: 2280; CHECK: # %bb.0: # %entry 2281; CHECK-NEXT: lui a2, 1 2282; CHECK-NEXT: add a2, a0, a2 2283; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2284; CHECK-NEXT: vmv.v.i v8, 0 2285; CHECK-NEXT: .LBB36_1: # %vector.body 2286; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2287; CHECK-NEXT: vle32.v v9, (a0) 2288; CHECK-NEXT: vmseq.vx v0, v9, a1 2289; CHECK-NEXT: vse32.v v8, (a0), v0.t 2290; CHECK-NEXT: addi a0, a0, 16 2291; CHECK-NEXT: bne a0, a2, .LBB36_1 2292; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2293; CHECK-NEXT: ret 2294entry: 2295 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0 2296 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2297 br label %vector.body 2298 2299vector.body: ; preds = %vector.body, %entry 2300 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2301 %0 = getelementptr inbounds i32, ptr %x, i64 %index 2302 %wide.load = load <4 x i32>, ptr %0, align 4 2303 %1 = icmp eq <4 x i32> %wide.load, %broadcast.splat 2304 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1) 2305 %index.next = add nuw i64 %index, 4 2306 %2 = icmp eq i64 %index.next, 1024 2307 br i1 %2, label %for.cond.cleanup, label %vector.body 2308 2309for.cond.cleanup: ; preds = %vector.body 2310 ret void 2311} 2312declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) 2313 2314define void @sink_splat_fcmp(ptr nocapture %x, float %y) { 2315; CHECK-LABEL: sink_splat_fcmp: 2316; CHECK: # %bb.0: # %entry 2317; CHECK-NEXT: lui a1, 1 2318; CHECK-NEXT: add a1, a0, a1 2319; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2320; CHECK-NEXT: vmv.v.i v8, 0 2321; CHECK-NEXT: .LBB37_1: # %vector.body 2322; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2323; CHECK-NEXT: vle32.v v9, (a0) 2324; CHECK-NEXT: vmfeq.vf v0, v9, fa0 2325; CHECK-NEXT: vse32.v v8, (a0), v0.t 2326; CHECK-NEXT: addi a0, a0, 16 2327; CHECK-NEXT: bne a0, a1, .LBB37_1 2328; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2329; CHECK-NEXT: ret 2330entry: 2331 %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0 2332 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 2333 br label %vector.body 2334 2335vector.body: ; preds = %vector.body, %entry 2336 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2337 %0 = getelementptr inbounds float, ptr %x, i64 %index 2338 %wide.load = load <4 x float>, ptr %0, align 4 2339 %1 = fcmp fast oeq <4 x float> %wide.load, %broadcast.splat 2340 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1) 2341 %index.next = add nuw i64 %index, 4 2342 %2 = icmp eq i64 %index.next, 1024 2343 br i1 %2, label %for.cond.cleanup, label %vector.body 2344 2345for.cond.cleanup: ; preds = %vector.body 2346 ret void 2347} 2348declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) 2349 2350define void @sink_splat_udiv(ptr nocapture %a, i32 signext %x) { 2351; CHECK-LABEL: sink_splat_udiv: 2352; CHECK: # %bb.0: # %entry 2353; CHECK-NEXT: lui a2, 1 2354; CHECK-NEXT: add a2, a0, a2 2355; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2356; CHECK-NEXT: .LBB38_1: # %vector.body 2357; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2358; CHECK-NEXT: vle32.v v8, (a0) 2359; CHECK-NEXT: vdivu.vx v8, v8, a1 2360; CHECK-NEXT: vse32.v v8, (a0) 2361; CHECK-NEXT: addi a0, a0, 16 2362; CHECK-NEXT: bne a0, a2, .LBB38_1 2363; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2364; CHECK-NEXT: ret 2365entry: 2366 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2367 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2368 br label %vector.body 2369 2370vector.body: ; preds = %vector.body, %entry 2371 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2372 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2373 %wide.load = load <4 x i32>, ptr %0, align 4 2374 %1 = udiv <4 x i32> %wide.load, %broadcast.splat 2375 store <4 x i32> %1, ptr %0, align 4 2376 %index.next = add nuw i64 %index, 4 2377 %2 = icmp eq i64 %index.next, 1024 2378 br i1 %2, label %for.cond.cleanup, label %vector.body 2379 2380for.cond.cleanup: ; preds = %vector.body 2381 ret void 2382} 2383 2384define void @sink_splat_sdiv(ptr nocapture %a, i32 signext %x) { 2385; CHECK-LABEL: sink_splat_sdiv: 2386; CHECK: # %bb.0: # %entry 2387; CHECK-NEXT: lui a2, 1 2388; CHECK-NEXT: add a2, a0, a2 2389; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2390; CHECK-NEXT: .LBB39_1: # %vector.body 2391; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2392; CHECK-NEXT: vle32.v v8, (a0) 2393; CHECK-NEXT: vdiv.vx v8, v8, a1 2394; CHECK-NEXT: vse32.v v8, (a0) 2395; CHECK-NEXT: addi a0, a0, 16 2396; CHECK-NEXT: bne a0, a2, .LBB39_1 2397; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2398; CHECK-NEXT: ret 2399entry: 2400 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2401 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2402 br label %vector.body 2403 2404vector.body: ; preds = %vector.body, %entry 2405 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2406 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2407 %wide.load = load <4 x i32>, ptr %0, align 4 2408 %1 = sdiv <4 x i32> %wide.load, %broadcast.splat 2409 store <4 x i32> %1, ptr %0, align 4 2410 %index.next = add nuw i64 %index, 4 2411 %2 = icmp eq i64 %index.next, 1024 2412 br i1 %2, label %for.cond.cleanup, label %vector.body 2413 2414for.cond.cleanup: ; preds = %vector.body 2415 ret void 2416} 2417 2418define void @sink_splat_urem(ptr nocapture %a, i32 signext %x) { 2419; CHECK-LABEL: sink_splat_urem: 2420; CHECK: # %bb.0: # %entry 2421; CHECK-NEXT: lui a2, 1 2422; CHECK-NEXT: add a2, a0, a2 2423; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2424; CHECK-NEXT: .LBB40_1: # %vector.body 2425; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2426; CHECK-NEXT: vle32.v v8, (a0) 2427; CHECK-NEXT: vremu.vx v8, v8, a1 2428; CHECK-NEXT: vse32.v v8, (a0) 2429; CHECK-NEXT: addi a0, a0, 16 2430; CHECK-NEXT: bne a0, a2, .LBB40_1 2431; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2432; CHECK-NEXT: ret 2433entry: 2434 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2435 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2436 br label %vector.body 2437 2438vector.body: ; preds = %vector.body, %entry 2439 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2440 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2441 %wide.load = load <4 x i32>, ptr %0, align 4 2442 %1 = urem <4 x i32> %wide.load, %broadcast.splat 2443 store <4 x i32> %1, ptr %0, align 4 2444 %index.next = add nuw i64 %index, 4 2445 %2 = icmp eq i64 %index.next, 1024 2446 br i1 %2, label %for.cond.cleanup, label %vector.body 2447 2448for.cond.cleanup: ; preds = %vector.body 2449 ret void 2450} 2451 2452define void @sink_splat_srem(ptr nocapture %a, i32 signext %x) { 2453; CHECK-LABEL: sink_splat_srem: 2454; CHECK: # %bb.0: # %entry 2455; CHECK-NEXT: lui a2, 1 2456; CHECK-NEXT: add a2, a0, a2 2457; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2458; CHECK-NEXT: .LBB41_1: # %vector.body 2459; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2460; CHECK-NEXT: vle32.v v8, (a0) 2461; CHECK-NEXT: vrem.vx v8, v8, a1 2462; CHECK-NEXT: vse32.v v8, (a0) 2463; CHECK-NEXT: addi a0, a0, 16 2464; CHECK-NEXT: bne a0, a2, .LBB41_1 2465; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2466; CHECK-NEXT: ret 2467entry: 2468 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2469 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2470 br label %vector.body 2471 2472vector.body: ; preds = %vector.body, %entry 2473 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2474 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2475 %wide.load = load <4 x i32>, ptr %0, align 4 2476 %1 = srem <4 x i32> %wide.load, %broadcast.splat 2477 store <4 x i32> %1, ptr %0, align 4 2478 %index.next = add nuw i64 %index, 4 2479 %2 = icmp eq i64 %index.next, 1024 2480 br i1 %2, label %for.cond.cleanup, label %vector.body 2481 2482for.cond.cleanup: ; preds = %vector.body 2483 ret void 2484} 2485 2486define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) { 2487; CHECK-LABEL: sink_splat_udiv_scalable: 2488; CHECK: # %bb.0: # %entry 2489; CHECK-NEXT: csrr a5, vlenb 2490; CHECK-NEXT: srli a3, a5, 1 2491; CHECK-NEXT: li a2, 1024 2492; CHECK-NEXT: bgeu a2, a3, .LBB42_2 2493; CHECK-NEXT: # %bb.1: 2494; CHECK-NEXT: li a2, 0 2495; CHECK-NEXT: j .LBB42_5 2496; CHECK-NEXT: .LBB42_2: # %vector.ph 2497; CHECK-NEXT: addi a2, a3, -1 2498; CHECK-NEXT: andi a4, a2, 1024 2499; CHECK-NEXT: xori a2, a4, 1024 2500; CHECK-NEXT: slli a5, a5, 1 2501; CHECK-NEXT: mv a6, a0 2502; CHECK-NEXT: mv a7, a2 2503; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 2504; CHECK-NEXT: .LBB42_3: # %vector.body 2505; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2506; CHECK-NEXT: vl2re32.v v8, (a6) 2507; CHECK-NEXT: sub a7, a7, a3 2508; CHECK-NEXT: vdivu.vx v8, v8, a1 2509; CHECK-NEXT: vs2r.v v8, (a6) 2510; CHECK-NEXT: add a6, a6, a5 2511; CHECK-NEXT: bnez a7, .LBB42_3 2512; CHECK-NEXT: # %bb.4: # %middle.block 2513; CHECK-NEXT: beqz a4, .LBB42_7 2514; CHECK-NEXT: .LBB42_5: # %for.body.preheader 2515; CHECK-NEXT: slli a2, a2, 2 2516; CHECK-NEXT: lui a3, 1 2517; CHECK-NEXT: add a2, a0, a2 2518; CHECK-NEXT: add a0, a0, a3 2519; CHECK-NEXT: .LBB42_6: # %for.body 2520; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2521; CHECK-NEXT: lw a3, 0(a2) 2522; CHECK-NEXT: divuw a3, a3, a1 2523; CHECK-NEXT: sw a3, 0(a2) 2524; CHECK-NEXT: addi a2, a2, 4 2525; CHECK-NEXT: bne a2, a0, .LBB42_6 2526; CHECK-NEXT: .LBB42_7: # %for.cond.cleanup 2527; CHECK-NEXT: ret 2528entry: 2529 %0 = call i64 @llvm.vscale.i64() 2530 %1 = shl i64 %0, 2 2531 %min.iters.check = icmp ugt i64 %1, 1024 2532 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 2533 2534vector.ph: ; preds = %entry 2535 %2 = call i64 @llvm.vscale.i64() 2536 %3 = shl i64 %2, 2 2537 %n.mod.vf = urem i64 1024, %3 2538 %n.vec = sub nsw i64 1024, %n.mod.vf 2539 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 2540 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 2541 %4 = call i64 @llvm.vscale.i64() 2542 %5 = shl i64 %4, 2 2543 br label %vector.body 2544 2545vector.body: ; preds = %vector.body, %vector.ph 2546 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2547 %6 = getelementptr inbounds i32, ptr %a, i64 %index 2548 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 2549 %7 = udiv <vscale x 4 x i32> %wide.load, %broadcast.splat 2550 store <vscale x 4 x i32> %7, ptr %6, align 4 2551 %index.next = add nuw i64 %index, %5 2552 %8 = icmp eq i64 %index.next, %n.vec 2553 br i1 %8, label %middle.block, label %vector.body 2554 2555middle.block: ; preds = %vector.body 2556 %cmp.n = icmp eq i64 %n.mod.vf, 0 2557 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 2558 2559for.body.preheader: ; preds = %entry, %middle.block 2560 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 2561 br label %for.body 2562 2563for.cond.cleanup: ; preds = %for.body, %middle.block 2564 ret void 2565 2566for.body: ; preds = %for.body.preheader, %for.body 2567 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 2568 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 2569 %9 = load i32, ptr %arrayidx, align 4 2570 %div = udiv i32 %9, %x 2571 store i32 %div, ptr %arrayidx, align 4 2572 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2573 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 2574 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 2575} 2576 2577define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) { 2578; CHECK-LABEL: sink_splat_sdiv_scalable: 2579; CHECK: # %bb.0: # %entry 2580; CHECK-NEXT: csrr a5, vlenb 2581; CHECK-NEXT: srli a3, a5, 1 2582; CHECK-NEXT: li a2, 1024 2583; CHECK-NEXT: bgeu a2, a3, .LBB43_2 2584; CHECK-NEXT: # %bb.1: 2585; CHECK-NEXT: li a2, 0 2586; CHECK-NEXT: j .LBB43_5 2587; CHECK-NEXT: .LBB43_2: # %vector.ph 2588; CHECK-NEXT: addi a2, a3, -1 2589; CHECK-NEXT: andi a4, a2, 1024 2590; CHECK-NEXT: xori a2, a4, 1024 2591; CHECK-NEXT: slli a5, a5, 1 2592; CHECK-NEXT: mv a6, a0 2593; CHECK-NEXT: mv a7, a2 2594; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 2595; CHECK-NEXT: .LBB43_3: # %vector.body 2596; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2597; CHECK-NEXT: vl2re32.v v8, (a6) 2598; CHECK-NEXT: sub a7, a7, a3 2599; CHECK-NEXT: vdiv.vx v8, v8, a1 2600; CHECK-NEXT: vs2r.v v8, (a6) 2601; CHECK-NEXT: add a6, a6, a5 2602; CHECK-NEXT: bnez a7, .LBB43_3 2603; CHECK-NEXT: # %bb.4: # %middle.block 2604; CHECK-NEXT: beqz a4, .LBB43_7 2605; CHECK-NEXT: .LBB43_5: # %for.body.preheader 2606; CHECK-NEXT: slli a2, a2, 2 2607; CHECK-NEXT: lui a3, 1 2608; CHECK-NEXT: add a2, a0, a2 2609; CHECK-NEXT: add a0, a0, a3 2610; CHECK-NEXT: .LBB43_6: # %for.body 2611; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2612; CHECK-NEXT: lw a3, 0(a2) 2613; CHECK-NEXT: divw a3, a3, a1 2614; CHECK-NEXT: sw a3, 0(a2) 2615; CHECK-NEXT: addi a2, a2, 4 2616; CHECK-NEXT: bne a2, a0, .LBB43_6 2617; CHECK-NEXT: .LBB43_7: # %for.cond.cleanup 2618; CHECK-NEXT: ret 2619entry: 2620 %0 = call i64 @llvm.vscale.i64() 2621 %1 = shl i64 %0, 2 2622 %min.iters.check = icmp ugt i64 %1, 1024 2623 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 2624 2625vector.ph: ; preds = %entry 2626 %2 = call i64 @llvm.vscale.i64() 2627 %3 = shl i64 %2, 2 2628 %n.mod.vf = urem i64 1024, %3 2629 %n.vec = sub nsw i64 1024, %n.mod.vf 2630 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 2631 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 2632 %4 = call i64 @llvm.vscale.i64() 2633 %5 = shl i64 %4, 2 2634 br label %vector.body 2635 2636vector.body: ; preds = %vector.body, %vector.ph 2637 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2638 %6 = getelementptr inbounds i32, ptr %a, i64 %index 2639 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 2640 %7 = sdiv <vscale x 4 x i32> %wide.load, %broadcast.splat 2641 store <vscale x 4 x i32> %7, ptr %6, align 4 2642 %index.next = add nuw i64 %index, %5 2643 %8 = icmp eq i64 %index.next, %n.vec 2644 br i1 %8, label %middle.block, label %vector.body 2645 2646middle.block: ; preds = %vector.body 2647 %cmp.n = icmp eq i64 %n.mod.vf, 0 2648 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 2649 2650for.body.preheader: ; preds = %entry, %middle.block 2651 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 2652 br label %for.body 2653 2654for.cond.cleanup: ; preds = %for.body, %middle.block 2655 ret void 2656 2657for.body: ; preds = %for.body.preheader, %for.body 2658 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 2659 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 2660 %9 = load i32, ptr %arrayidx, align 4 2661 %div = sdiv i32 %9, %x 2662 store i32 %div, ptr %arrayidx, align 4 2663 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2664 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 2665 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 2666} 2667 2668define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) { 2669; CHECK-LABEL: sink_splat_urem_scalable: 2670; CHECK: # %bb.0: # %entry 2671; CHECK-NEXT: csrr a5, vlenb 2672; CHECK-NEXT: srli a3, a5, 1 2673; CHECK-NEXT: li a2, 1024 2674; CHECK-NEXT: bgeu a2, a3, .LBB44_2 2675; CHECK-NEXT: # %bb.1: 2676; CHECK-NEXT: li a2, 0 2677; CHECK-NEXT: j .LBB44_5 2678; CHECK-NEXT: .LBB44_2: # %vector.ph 2679; CHECK-NEXT: addi a2, a3, -1 2680; CHECK-NEXT: andi a4, a2, 1024 2681; CHECK-NEXT: xori a2, a4, 1024 2682; CHECK-NEXT: slli a5, a5, 1 2683; CHECK-NEXT: mv a6, a0 2684; CHECK-NEXT: mv a7, a2 2685; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 2686; CHECK-NEXT: .LBB44_3: # %vector.body 2687; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2688; CHECK-NEXT: vl2re32.v v8, (a6) 2689; CHECK-NEXT: sub a7, a7, a3 2690; CHECK-NEXT: vremu.vx v8, v8, a1 2691; CHECK-NEXT: vs2r.v v8, (a6) 2692; CHECK-NEXT: add a6, a6, a5 2693; CHECK-NEXT: bnez a7, .LBB44_3 2694; CHECK-NEXT: # %bb.4: # %middle.block 2695; CHECK-NEXT: beqz a4, .LBB44_7 2696; CHECK-NEXT: .LBB44_5: # %for.body.preheader 2697; CHECK-NEXT: slli a2, a2, 2 2698; CHECK-NEXT: lui a3, 1 2699; CHECK-NEXT: add a2, a0, a2 2700; CHECK-NEXT: add a0, a0, a3 2701; CHECK-NEXT: .LBB44_6: # %for.body 2702; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2703; CHECK-NEXT: lw a3, 0(a2) 2704; CHECK-NEXT: remuw a3, a3, a1 2705; CHECK-NEXT: sw a3, 0(a2) 2706; CHECK-NEXT: addi a2, a2, 4 2707; CHECK-NEXT: bne a2, a0, .LBB44_6 2708; CHECK-NEXT: .LBB44_7: # %for.cond.cleanup 2709; CHECK-NEXT: ret 2710entry: 2711 %0 = call i64 @llvm.vscale.i64() 2712 %1 = shl i64 %0, 2 2713 %min.iters.check = icmp ugt i64 %1, 1024 2714 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 2715 2716vector.ph: ; preds = %entry 2717 %2 = call i64 @llvm.vscale.i64() 2718 %3 = shl i64 %2, 2 2719 %n.mod.vf = urem i64 1024, %3 2720 %n.vec = sub nsw i64 1024, %n.mod.vf 2721 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 2722 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 2723 %4 = call i64 @llvm.vscale.i64() 2724 %5 = shl i64 %4, 2 2725 br label %vector.body 2726 2727vector.body: ; preds = %vector.body, %vector.ph 2728 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2729 %6 = getelementptr inbounds i32, ptr %a, i64 %index 2730 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 2731 %7 = urem <vscale x 4 x i32> %wide.load, %broadcast.splat 2732 store <vscale x 4 x i32> %7, ptr %6, align 4 2733 %index.next = add nuw i64 %index, %5 2734 %8 = icmp eq i64 %index.next, %n.vec 2735 br i1 %8, label %middle.block, label %vector.body 2736 2737middle.block: ; preds = %vector.body 2738 %cmp.n = icmp eq i64 %n.mod.vf, 0 2739 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 2740 2741for.body.preheader: ; preds = %entry, %middle.block 2742 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 2743 br label %for.body 2744 2745for.cond.cleanup: ; preds = %for.body, %middle.block 2746 ret void 2747 2748for.body: ; preds = %for.body.preheader, %for.body 2749 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 2750 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 2751 %9 = load i32, ptr %arrayidx, align 4 2752 %rem = urem i32 %9, %x 2753 store i32 %rem, ptr %arrayidx, align 4 2754 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2755 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 2756 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 2757} 2758 2759define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) { 2760; CHECK-LABEL: sink_splat_srem_scalable: 2761; CHECK: # %bb.0: # %entry 2762; CHECK-NEXT: csrr a5, vlenb 2763; CHECK-NEXT: srli a3, a5, 1 2764; CHECK-NEXT: li a2, 1024 2765; CHECK-NEXT: bgeu a2, a3, .LBB45_2 2766; CHECK-NEXT: # %bb.1: 2767; CHECK-NEXT: li a2, 0 2768; CHECK-NEXT: j .LBB45_5 2769; CHECK-NEXT: .LBB45_2: # %vector.ph 2770; CHECK-NEXT: addi a2, a3, -1 2771; CHECK-NEXT: andi a4, a2, 1024 2772; CHECK-NEXT: xori a2, a4, 1024 2773; CHECK-NEXT: slli a5, a5, 1 2774; CHECK-NEXT: mv a6, a0 2775; CHECK-NEXT: mv a7, a2 2776; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma 2777; CHECK-NEXT: .LBB45_3: # %vector.body 2778; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2779; CHECK-NEXT: vl2re32.v v8, (a6) 2780; CHECK-NEXT: sub a7, a7, a3 2781; CHECK-NEXT: vrem.vx v8, v8, a1 2782; CHECK-NEXT: vs2r.v v8, (a6) 2783; CHECK-NEXT: add a6, a6, a5 2784; CHECK-NEXT: bnez a7, .LBB45_3 2785; CHECK-NEXT: # %bb.4: # %middle.block 2786; CHECK-NEXT: beqz a4, .LBB45_7 2787; CHECK-NEXT: .LBB45_5: # %for.body.preheader 2788; CHECK-NEXT: slli a2, a2, 2 2789; CHECK-NEXT: lui a3, 1 2790; CHECK-NEXT: add a2, a0, a2 2791; CHECK-NEXT: add a0, a0, a3 2792; CHECK-NEXT: .LBB45_6: # %for.body 2793; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2794; CHECK-NEXT: lw a3, 0(a2) 2795; CHECK-NEXT: remw a3, a3, a1 2796; CHECK-NEXT: sw a3, 0(a2) 2797; CHECK-NEXT: addi a2, a2, 4 2798; CHECK-NEXT: bne a2, a0, .LBB45_6 2799; CHECK-NEXT: .LBB45_7: # %for.cond.cleanup 2800; CHECK-NEXT: ret 2801entry: 2802 %0 = call i64 @llvm.vscale.i64() 2803 %1 = shl i64 %0, 2 2804 %min.iters.check = icmp ugt i64 %1, 1024 2805 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph 2806 2807vector.ph: ; preds = %entry 2808 %2 = call i64 @llvm.vscale.i64() 2809 %3 = shl i64 %2, 2 2810 %n.mod.vf = urem i64 1024, %3 2811 %n.vec = sub nsw i64 1024, %n.mod.vf 2812 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0 2813 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 2814 %4 = call i64 @llvm.vscale.i64() 2815 %5 = shl i64 %4, 2 2816 br label %vector.body 2817 2818vector.body: ; preds = %vector.body, %vector.ph 2819 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2820 %6 = getelementptr inbounds i32, ptr %a, i64 %index 2821 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4 2822 %7 = srem <vscale x 4 x i32> %wide.load, %broadcast.splat 2823 store <vscale x 4 x i32> %7, ptr %6, align 4 2824 %index.next = add nuw i64 %index, %5 2825 %8 = icmp eq i64 %index.next, %n.vec 2826 br i1 %8, label %middle.block, label %vector.body 2827 2828middle.block: ; preds = %vector.body 2829 %cmp.n = icmp eq i64 %n.mod.vf, 0 2830 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader 2831 2832for.body.preheader: ; preds = %entry, %middle.block 2833 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ] 2834 br label %for.body 2835 2836for.cond.cleanup: ; preds = %for.body, %middle.block 2837 ret void 2838 2839for.body: ; preds = %for.body.preheader, %for.body 2840 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] 2841 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 2842 %9 = load i32, ptr %arrayidx, align 4 2843 %rem = srem i32 %9, %x 2844 store i32 %rem, ptr %arrayidx, align 4 2845 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2846 %cmp.not = icmp eq i64 %indvars.iv.next, 1024 2847 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 2848} 2849 2850declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 2851 2852define void @sink_splat_min(ptr nocapture %a, i32 signext %x) { 2853; CHECK-LABEL: sink_splat_min: 2854; CHECK: # %bb.0: # %entry 2855; CHECK-NEXT: li a2, 1024 2856; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2857; CHECK-NEXT: .LBB46_1: # %vector.body 2858; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2859; CHECK-NEXT: vle32.v v8, (a0) 2860; CHECK-NEXT: addi a2, a2, 4 2861; CHECK-NEXT: vmin.vx v8, v8, a1 2862; CHECK-NEXT: vse32.v v8, (a0) 2863; CHECK-NEXT: addi a0, a0, -16 2864; CHECK-NEXT: bnez a2, .LBB46_1 2865; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2866; CHECK-NEXT: ret 2867entry: 2868 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2869 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2870 br label %vector.body 2871 2872vector.body: ; preds = %vector.body, %entry 2873 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2874 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2875 %wide.load = load <4 x i32>, ptr %0, align 4 2876 %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 2877 store <4 x i32> %1, ptr %0, align 4 2878 %index.next = sub nuw i64 %index, 4 2879 %2 = icmp eq i64 %index.next, 1024 2880 br i1 %2, label %for.cond.cleanup, label %vector.body 2881 2882for.cond.cleanup: ; preds = %vector.body 2883 ret void 2884} 2885 2886define void @sink_splat_min_commute(ptr nocapture %a, i32 signext %x) { 2887; CHECK-LABEL: sink_splat_min_commute: 2888; CHECK: # %bb.0: # %entry 2889; CHECK-NEXT: li a2, 1024 2890; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2891; CHECK-NEXT: .LBB47_1: # %vector.body 2892; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2893; CHECK-NEXT: vle32.v v8, (a0) 2894; CHECK-NEXT: addi a2, a2, 4 2895; CHECK-NEXT: vmin.vx v8, v8, a1 2896; CHECK-NEXT: vse32.v v8, (a0) 2897; CHECK-NEXT: addi a0, a0, -16 2898; CHECK-NEXT: bnez a2, .LBB47_1 2899; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2900; CHECK-NEXT: ret 2901entry: 2902 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2903 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2904 br label %vector.body 2905 2906vector.body: ; preds = %vector.body, %entry 2907 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2908 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2909 %wide.load = load <4 x i32>, ptr %0, align 4 2910 %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load) 2911 store <4 x i32> %1, ptr %0, align 4 2912 %index.next = sub nuw i64 %index, 4 2913 %2 = icmp eq i64 %index.next, 1024 2914 br i1 %2, label %for.cond.cleanup, label %vector.body 2915 2916for.cond.cleanup: ; preds = %vector.body 2917 ret void 2918} 2919 2920declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 2921 2922define void @sink_splat_max(ptr nocapture %a, i32 signext %x) { 2923; CHECK-LABEL: sink_splat_max: 2924; CHECK: # %bb.0: # %entry 2925; CHECK-NEXT: li a2, 1024 2926; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2927; CHECK-NEXT: .LBB48_1: # %vector.body 2928; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2929; CHECK-NEXT: vle32.v v8, (a0) 2930; CHECK-NEXT: addi a2, a2, 4 2931; CHECK-NEXT: vmax.vx v8, v8, a1 2932; CHECK-NEXT: vse32.v v8, (a0) 2933; CHECK-NEXT: addi a0, a0, -16 2934; CHECK-NEXT: bnez a2, .LBB48_1 2935; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2936; CHECK-NEXT: ret 2937entry: 2938 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2939 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2940 br label %vector.body 2941 2942vector.body: ; preds = %vector.body, %entry 2943 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2944 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2945 %wide.load = load <4 x i32>, ptr %0, align 4 2946 %1 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 2947 store <4 x i32> %1, ptr %0, align 4 2948 %index.next = sub nuw i64 %index, 4 2949 %2 = icmp eq i64 %index.next, 1024 2950 br i1 %2, label %for.cond.cleanup, label %vector.body 2951 2952for.cond.cleanup: ; preds = %vector.body 2953 ret void 2954} 2955 2956define void @sink_splat_max_commute(ptr nocapture %a, i32 signext %x) { 2957; CHECK-LABEL: sink_splat_max_commute: 2958; CHECK: # %bb.0: # %entry 2959; CHECK-NEXT: li a2, 1024 2960; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2961; CHECK-NEXT: .LBB49_1: # %vector.body 2962; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2963; CHECK-NEXT: vle32.v v8, (a0) 2964; CHECK-NEXT: addi a2, a2, 4 2965; CHECK-NEXT: vmax.vx v8, v8, a1 2966; CHECK-NEXT: vse32.v v8, (a0) 2967; CHECK-NEXT: addi a0, a0, -16 2968; CHECK-NEXT: bnez a2, .LBB49_1 2969; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 2970; CHECK-NEXT: ret 2971entry: 2972 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 2973 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 2974 br label %vector.body 2975 2976vector.body: ; preds = %vector.body, %entry 2977 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 2978 %0 = getelementptr inbounds i32, ptr %a, i64 %index 2979 %wide.load = load <4 x i32>, ptr %0, align 4 2980 %1 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load) 2981 store <4 x i32> %1, ptr %0, align 4 2982 %index.next = sub nuw i64 %index, 4 2983 %2 = icmp eq i64 %index.next, 1024 2984 br i1 %2, label %for.cond.cleanup, label %vector.body 2985 2986for.cond.cleanup: ; preds = %vector.body 2987 ret void 2988} 2989 2990declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) 2991 2992define void @sink_splat_umin(ptr nocapture %a, i32 signext %x) { 2993; CHECK-LABEL: sink_splat_umin: 2994; CHECK: # %bb.0: # %entry 2995; CHECK-NEXT: li a2, 1024 2996; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2997; CHECK-NEXT: .LBB50_1: # %vector.body 2998; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 2999; CHECK-NEXT: vle32.v v8, (a0) 3000; CHECK-NEXT: addi a2, a2, 4 3001; CHECK-NEXT: vminu.vx v8, v8, a1 3002; CHECK-NEXT: vse32.v v8, (a0) 3003; CHECK-NEXT: addi a0, a0, -16 3004; CHECK-NEXT: bnez a2, .LBB50_1 3005; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3006; CHECK-NEXT: ret 3007entry: 3008 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3009 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3010 br label %vector.body 3011 3012vector.body: ; preds = %vector.body, %entry 3013 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3014 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3015 %wide.load = load <4 x i32>, ptr %0, align 4 3016 %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 3017 store <4 x i32> %1, ptr %0, align 4 3018 %index.next = sub nuw i64 %index, 4 3019 %2 = icmp eq i64 %index.next, 1024 3020 br i1 %2, label %for.cond.cleanup, label %vector.body 3021 3022for.cond.cleanup: ; preds = %vector.body 3023 ret void 3024} 3025 3026define void @sink_splat_umin_commute(ptr nocapture %a, i32 signext %x) { 3027; CHECK-LABEL: sink_splat_umin_commute: 3028; CHECK: # %bb.0: # %entry 3029; CHECK-NEXT: li a2, 1024 3030; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3031; CHECK-NEXT: .LBB51_1: # %vector.body 3032; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3033; CHECK-NEXT: vle32.v v8, (a0) 3034; CHECK-NEXT: addi a2, a2, 4 3035; CHECK-NEXT: vminu.vx v8, v8, a1 3036; CHECK-NEXT: vse32.v v8, (a0) 3037; CHECK-NEXT: addi a0, a0, -16 3038; CHECK-NEXT: bnez a2, .LBB51_1 3039; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3040; CHECK-NEXT: ret 3041entry: 3042 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3043 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3044 br label %vector.body 3045 3046vector.body: ; preds = %vector.body, %entry 3047 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3048 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3049 %wide.load = load <4 x i32>, ptr %0, align 4 3050 %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load) 3051 store <4 x i32> %1, ptr %0, align 4 3052 %index.next = sub nuw i64 %index, 4 3053 %2 = icmp eq i64 %index.next, 1024 3054 br i1 %2, label %for.cond.cleanup, label %vector.body 3055 3056for.cond.cleanup: ; preds = %vector.body 3057 ret void 3058} 3059 3060declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) 3061 3062define void @sink_splat_umax(ptr nocapture %a, i32 signext %x) { 3063; CHECK-LABEL: sink_splat_umax: 3064; CHECK: # %bb.0: # %entry 3065; CHECK-NEXT: li a2, 1024 3066; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3067; CHECK-NEXT: .LBB52_1: # %vector.body 3068; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3069; CHECK-NEXT: vle32.v v8, (a0) 3070; CHECK-NEXT: addi a2, a2, 4 3071; CHECK-NEXT: vmaxu.vx v8, v8, a1 3072; CHECK-NEXT: vse32.v v8, (a0) 3073; CHECK-NEXT: addi a0, a0, -16 3074; CHECK-NEXT: bnez a2, .LBB52_1 3075; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3076; CHECK-NEXT: ret 3077entry: 3078 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3079 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3080 br label %vector.body 3081 3082vector.body: ; preds = %vector.body, %entry 3083 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3084 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3085 %wide.load = load <4 x i32>, ptr %0, align 4 3086 %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 3087 store <4 x i32> %1, ptr %0, align 4 3088 %index.next = sub nuw i64 %index, 4 3089 %2 = icmp eq i64 %index.next, 1024 3090 br i1 %2, label %for.cond.cleanup, label %vector.body 3091 3092for.cond.cleanup: ; preds = %vector.body 3093 ret void 3094} 3095 3096define void @sink_splat_umax_commute(ptr nocapture %a, i32 signext %x) { 3097; CHECK-LABEL: sink_splat_umax_commute: 3098; CHECK: # %bb.0: # %entry 3099; CHECK-NEXT: li a2, 1024 3100; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3101; CHECK-NEXT: .LBB53_1: # %vector.body 3102; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3103; CHECK-NEXT: vle32.v v8, (a0) 3104; CHECK-NEXT: addi a2, a2, 4 3105; CHECK-NEXT: vmaxu.vx v8, v8, a1 3106; CHECK-NEXT: vse32.v v8, (a0) 3107; CHECK-NEXT: addi a0, a0, -16 3108; CHECK-NEXT: bnez a2, .LBB53_1 3109; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3110; CHECK-NEXT: ret 3111entry: 3112 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3113 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3114 br label %vector.body 3115 3116vector.body: ; preds = %vector.body, %entry 3117 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3118 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3119 %wide.load = load <4 x i32>, ptr %0, align 4 3120 %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load) 3121 store <4 x i32> %1, ptr %0, align 4 3122 %index.next = sub nuw i64 %index, 4 3123 %2 = icmp eq i64 %index.next, 1024 3124 br i1 %2, label %for.cond.cleanup, label %vector.body 3125 3126for.cond.cleanup: ; preds = %vector.body 3127 ret void 3128} 3129 3130declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) 3131 3132define void @sink_splat_sadd_sat(ptr nocapture %a, i32 signext %x) { 3133; CHECK-LABEL: sink_splat_sadd_sat: 3134; CHECK: # %bb.0: # %entry 3135; CHECK-NEXT: lui a2, 1 3136; CHECK-NEXT: add a2, a0, a2 3137; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3138; CHECK-NEXT: .LBB54_1: # %vector.body 3139; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3140; CHECK-NEXT: vle32.v v8, (a0) 3141; CHECK-NEXT: vsadd.vx v8, v8, a1 3142; CHECK-NEXT: vse32.v v8, (a0) 3143; CHECK-NEXT: addi a0, a0, 16 3144; CHECK-NEXT: bne a0, a2, .LBB54_1 3145; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3146; CHECK-NEXT: ret 3147entry: 3148 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3149 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3150 br label %vector.body 3151 3152vector.body: ; preds = %vector.body, %entry 3153 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3154 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3155 %wide.load = load <4 x i32>, ptr %0, align 4 3156 %1 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 3157 store <4 x i32> %1, ptr %0, align 4 3158 %index.next = add nuw i64 %index, 4 3159 %2 = icmp eq i64 %index.next, 1024 3160 br i1 %2, label %for.cond.cleanup, label %vector.body 3161 3162for.cond.cleanup: ; preds = %vector.body 3163 ret void 3164} 3165 3166define void @sink_splat_sadd_sat_commute(ptr nocapture %a, i32 signext %x) { 3167; CHECK-LABEL: sink_splat_sadd_sat_commute: 3168; CHECK: # %bb.0: # %entry 3169; CHECK-NEXT: lui a2, 1 3170; CHECK-NEXT: add a2, a0, a2 3171; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3172; CHECK-NEXT: .LBB55_1: # %vector.body 3173; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3174; CHECK-NEXT: vle32.v v8, (a0) 3175; CHECK-NEXT: vsadd.vx v8, v8, a1 3176; CHECK-NEXT: vse32.v v8, (a0) 3177; CHECK-NEXT: addi a0, a0, 16 3178; CHECK-NEXT: bne a0, a2, .LBB55_1 3179; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3180; CHECK-NEXT: ret 3181entry: 3182 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3183 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3184 br label %vector.body 3185 3186vector.body: ; preds = %vector.body, %entry 3187 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3188 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3189 %wide.load = load <4 x i32>, ptr %0, align 4 3190 %1 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load) 3191 store <4 x i32> %1, ptr %0, align 4 3192 %index.next = add nuw i64 %index, 4 3193 %2 = icmp eq i64 %index.next, 1024 3194 br i1 %2, label %for.cond.cleanup, label %vector.body 3195 3196for.cond.cleanup: ; preds = %vector.body 3197 ret void 3198} 3199 3200declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) 3201 3202define void @sink_splat_ssub_sat(ptr nocapture %a, i32 signext %x) { 3203; CHECK-LABEL: sink_splat_ssub_sat: 3204; CHECK: # %bb.0: # %entry 3205; CHECK-NEXT: li a2, 1024 3206; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3207; CHECK-NEXT: .LBB56_1: # %vector.body 3208; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3209; CHECK-NEXT: vle32.v v8, (a0) 3210; CHECK-NEXT: addi a2, a2, 4 3211; CHECK-NEXT: vssub.vx v8, v8, a1 3212; CHECK-NEXT: vse32.v v8, (a0) 3213; CHECK-NEXT: addi a0, a0, -16 3214; CHECK-NEXT: bnez a2, .LBB56_1 3215; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3216; CHECK-NEXT: ret 3217entry: 3218 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3219 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3220 br label %vector.body 3221 3222vector.body: ; preds = %vector.body, %entry 3223 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3224 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3225 %wide.load = load <4 x i32>, ptr %0, align 4 3226 %1 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 3227 store <4 x i32> %1, ptr %0, align 4 3228 %index.next = sub nuw i64 %index, 4 3229 %2 = icmp eq i64 %index.next, 1024 3230 br i1 %2, label %for.cond.cleanup, label %vector.body 3231 3232for.cond.cleanup: ; preds = %vector.body 3233 ret void 3234} 3235 3236declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) 3237 3238define void @sink_splat_uadd_sat(ptr nocapture %a, i32 signext %x) { 3239; CHECK-LABEL: sink_splat_uadd_sat: 3240; CHECK: # %bb.0: # %entry 3241; CHECK-NEXT: lui a2, 1 3242; CHECK-NEXT: add a2, a0, a2 3243; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3244; CHECK-NEXT: .LBB57_1: # %vector.body 3245; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3246; CHECK-NEXT: vle32.v v8, (a0) 3247; CHECK-NEXT: vsaddu.vx v8, v8, a1 3248; CHECK-NEXT: vse32.v v8, (a0) 3249; CHECK-NEXT: addi a0, a0, 16 3250; CHECK-NEXT: bne a0, a2, .LBB57_1 3251; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3252; CHECK-NEXT: ret 3253entry: 3254 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3255 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3256 br label %vector.body 3257 3258vector.body: ; preds = %vector.body, %entry 3259 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3260 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3261 %wide.load = load <4 x i32>, ptr %0, align 4 3262 %1 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 3263 store <4 x i32> %1, ptr %0, align 4 3264 %index.next = add nuw i64 %index, 4 3265 %2 = icmp eq i64 %index.next, 1024 3266 br i1 %2, label %for.cond.cleanup, label %vector.body 3267 3268for.cond.cleanup: ; preds = %vector.body 3269 ret void 3270} 3271 3272define void @sink_splat_uadd_sat_commute(ptr nocapture %a, i32 signext %x) { 3273; CHECK-LABEL: sink_splat_uadd_sat_commute: 3274; CHECK: # %bb.0: # %entry 3275; CHECK-NEXT: lui a2, 1 3276; CHECK-NEXT: add a2, a0, a2 3277; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3278; CHECK-NEXT: .LBB58_1: # %vector.body 3279; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3280; CHECK-NEXT: vle32.v v8, (a0) 3281; CHECK-NEXT: vsaddu.vx v8, v8, a1 3282; CHECK-NEXT: vse32.v v8, (a0) 3283; CHECK-NEXT: addi a0, a0, 16 3284; CHECK-NEXT: bne a0, a2, .LBB58_1 3285; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3286; CHECK-NEXT: ret 3287entry: 3288 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3289 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3290 br label %vector.body 3291 3292vector.body: ; preds = %vector.body, %entry 3293 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3294 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3295 %wide.load = load <4 x i32>, ptr %0, align 4 3296 %1 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load) 3297 store <4 x i32> %1, ptr %0, align 4 3298 %index.next = add nuw i64 %index, 4 3299 %2 = icmp eq i64 %index.next, 1024 3300 br i1 %2, label %for.cond.cleanup, label %vector.body 3301 3302for.cond.cleanup: ; preds = %vector.body 3303 ret void 3304} 3305 3306declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) 3307 3308define void @sink_splat_usub_sat(ptr nocapture %a, i32 signext %x) { 3309; CHECK-LABEL: sink_splat_usub_sat: 3310; CHECK: # %bb.0: # %entry 3311; CHECK-NEXT: li a2, 1024 3312; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3313; CHECK-NEXT: .LBB59_1: # %vector.body 3314; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3315; CHECK-NEXT: vle32.v v8, (a0) 3316; CHECK-NEXT: addi a2, a2, 4 3317; CHECK-NEXT: vssubu.vx v8, v8, a1 3318; CHECK-NEXT: vse32.v v8, (a0) 3319; CHECK-NEXT: addi a0, a0, -16 3320; CHECK-NEXT: bnez a2, .LBB59_1 3321; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3322; CHECK-NEXT: ret 3323entry: 3324 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3325 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3326 br label %vector.body 3327 3328vector.body: ; preds = %vector.body, %entry 3329 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3330 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3331 %wide.load = load <4 x i32>, ptr %0, align 4 3332 %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat) 3333 store <4 x i32> %1, ptr %0, align 4 3334 %index.next = sub nuw i64 %index, 4 3335 %2 = icmp eq i64 %index.next, 1024 3336 br i1 %2, label %for.cond.cleanup, label %vector.body 3337 3338for.cond.cleanup: ; preds = %vector.body 3339 ret void 3340} 3341 3342declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3343 3344define void @sink_splat_vp_mul(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3345; CHECK-LABEL: sink_splat_vp_mul: 3346; CHECK: # %bb.0: # %entry 3347; CHECK-NEXT: lui a3, 1 3348; CHECK-NEXT: add a3, a0, a3 3349; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3350; CHECK-NEXT: .LBB60_1: # %vector.body 3351; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3352; CHECK-NEXT: vle32.v v8, (a0) 3353; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3354; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t 3355; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3356; CHECK-NEXT: vse32.v v8, (a0) 3357; CHECK-NEXT: addi a0, a0, 16 3358; CHECK-NEXT: bne a0, a3, .LBB60_1 3359; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3360; CHECK-NEXT: ret 3361entry: 3362 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3363 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3364 br label %vector.body 3365 3366vector.body: ; preds = %vector.body, %entry 3367 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3368 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3369 %wide.load = load <4 x i32>, ptr %0, align 4 3370 %1 = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3371 store <4 x i32> %1, ptr %0, align 4 3372 %index.next = add nuw i64 %index, 4 3373 %2 = icmp eq i64 %index.next, 1024 3374 br i1 %2, label %for.cond.cleanup, label %vector.body 3375 3376for.cond.cleanup: ; preds = %vector.body 3377 ret void 3378} 3379 3380declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3381 3382define void @sink_splat_vp_add(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3383; CHECK-LABEL: sink_splat_vp_add: 3384; CHECK: # %bb.0: # %entry 3385; CHECK-NEXT: lui a3, 1 3386; CHECK-NEXT: add a3, a0, a3 3387; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3388; CHECK-NEXT: .LBB61_1: # %vector.body 3389; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3390; CHECK-NEXT: vle32.v v8, (a0) 3391; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3392; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t 3393; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3394; CHECK-NEXT: vse32.v v8, (a0) 3395; CHECK-NEXT: addi a0, a0, 16 3396; CHECK-NEXT: bne a0, a3, .LBB61_1 3397; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3398; CHECK-NEXT: ret 3399entry: 3400 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3401 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3402 br label %vector.body 3403 3404vector.body: ; preds = %vector.body, %entry 3405 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3406 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3407 %wide.load = load <4 x i32>, ptr %0, align 4 3408 %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3409 store <4 x i32> %1, ptr %0, align 4 3410 %index.next = add nuw i64 %index, 4 3411 %2 = icmp eq i64 %index.next, 1024 3412 br i1 %2, label %for.cond.cleanup, label %vector.body 3413 3414for.cond.cleanup: ; preds = %vector.body 3415 ret void 3416} 3417 3418define void @sink_splat_vp_add_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3419; CHECK-LABEL: sink_splat_vp_add_commute: 3420; CHECK: # %bb.0: # %entry 3421; CHECK-NEXT: lui a3, 1 3422; CHECK-NEXT: add a3, a0, a3 3423; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3424; CHECK-NEXT: .LBB62_1: # %vector.body 3425; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3426; CHECK-NEXT: vle32.v v8, (a0) 3427; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3428; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t 3429; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3430; CHECK-NEXT: vse32.v v8, (a0) 3431; CHECK-NEXT: addi a0, a0, 16 3432; CHECK-NEXT: bne a0, a3, .LBB62_1 3433; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3434; CHECK-NEXT: ret 3435entry: 3436 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3437 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3438 br label %vector.body 3439 3440vector.body: ; preds = %vector.body, %entry 3441 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3442 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3443 %wide.load = load <4 x i32>, ptr %0, align 4 3444 %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 3445 store <4 x i32> %1, ptr %0, align 4 3446 %index.next = add nuw i64 %index, 4 3447 %2 = icmp eq i64 %index.next, 1024 3448 br i1 %2, label %for.cond.cleanup, label %vector.body 3449 3450for.cond.cleanup: ; preds = %vector.body 3451 ret void 3452} 3453 3454declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3455 3456define void @sink_splat_vp_sub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3457; CHECK-LABEL: sink_splat_vp_sub: 3458; CHECK: # %bb.0: # %entry 3459; CHECK-NEXT: lui a3, 1 3460; CHECK-NEXT: add a3, a0, a3 3461; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3462; CHECK-NEXT: .LBB63_1: # %vector.body 3463; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3464; CHECK-NEXT: vle32.v v8, (a0) 3465; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3466; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3467; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3468; CHECK-NEXT: vse32.v v8, (a0) 3469; CHECK-NEXT: addi a0, a0, 16 3470; CHECK-NEXT: bne a0, a3, .LBB63_1 3471; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3472; CHECK-NEXT: ret 3473entry: 3474 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3475 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3476 br label %vector.body 3477 3478vector.body: ; preds = %vector.body, %entry 3479 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3480 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3481 %wide.load = load <4 x i32>, ptr %0, align 4 3482 %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3483 store <4 x i32> %1, ptr %0, align 4 3484 %index.next = add nuw i64 %index, 4 3485 %2 = icmp eq i64 %index.next, 1024 3486 br i1 %2, label %for.cond.cleanup, label %vector.body 3487 3488for.cond.cleanup: ; preds = %vector.body 3489 ret void 3490} 3491 3492define void @sink_splat_vp_rsub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3493; CHECK-LABEL: sink_splat_vp_rsub: 3494; CHECK: # %bb.0: # %entry 3495; CHECK-NEXT: lui a3, 1 3496; CHECK-NEXT: add a3, a0, a3 3497; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3498; CHECK-NEXT: .LBB64_1: # %vector.body 3499; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3500; CHECK-NEXT: vle32.v v8, (a0) 3501; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3502; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t 3503; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3504; CHECK-NEXT: vse32.v v8, (a0) 3505; CHECK-NEXT: addi a0, a0, 16 3506; CHECK-NEXT: bne a0, a3, .LBB64_1 3507; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3508; CHECK-NEXT: ret 3509entry: 3510 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3511 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3512 br label %vector.body 3513 3514vector.body: ; preds = %vector.body, %entry 3515 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3516 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3517 %wide.load = load <4 x i32>, ptr %0, align 4 3518 %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 3519 store <4 x i32> %1, ptr %0, align 4 3520 %index.next = add nuw i64 %index, 4 3521 %2 = icmp eq i64 %index.next, 1024 3522 br i1 %2, label %for.cond.cleanup, label %vector.body 3523 3524for.cond.cleanup: ; preds = %vector.body 3525 ret void 3526} 3527 3528declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3529 3530define void @sink_splat_vp_shl(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3531; CHECK-LABEL: sink_splat_vp_shl: 3532; CHECK: # %bb.0: # %entry 3533; CHECK-NEXT: lui a3, 1 3534; CHECK-NEXT: add a3, a0, a3 3535; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3536; CHECK-NEXT: .LBB65_1: # %vector.body 3537; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3538; CHECK-NEXT: vle32.v v8, (a0) 3539; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3540; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t 3541; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3542; CHECK-NEXT: vse32.v v8, (a0) 3543; CHECK-NEXT: addi a0, a0, 16 3544; CHECK-NEXT: bne a0, a3, .LBB65_1 3545; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3546; CHECK-NEXT: ret 3547entry: 3548 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3549 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3550 br label %vector.body 3551 3552vector.body: ; preds = %vector.body, %entry 3553 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3554 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3555 %wide.load = load <4 x i32>, ptr %0, align 4 3556 %1 = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3557 store <4 x i32> %1, ptr %0, align 4 3558 %index.next = add nuw i64 %index, 4 3559 %2 = icmp eq i64 %index.next, 1024 3560 br i1 %2, label %for.cond.cleanup, label %vector.body 3561 3562for.cond.cleanup: ; preds = %vector.body 3563 ret void 3564} 3565 3566declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3567 3568define void @sink_splat_vp_lshr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3569; CHECK-LABEL: sink_splat_vp_lshr: 3570; CHECK: # %bb.0: # %entry 3571; CHECK-NEXT: lui a3, 1 3572; CHECK-NEXT: add a3, a0, a3 3573; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3574; CHECK-NEXT: .LBB66_1: # %vector.body 3575; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3576; CHECK-NEXT: vle32.v v8, (a0) 3577; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3578; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t 3579; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3580; CHECK-NEXT: vse32.v v8, (a0) 3581; CHECK-NEXT: addi a0, a0, 16 3582; CHECK-NEXT: bne a0, a3, .LBB66_1 3583; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3584; CHECK-NEXT: ret 3585entry: 3586 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3587 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3588 br label %vector.body 3589 3590vector.body: ; preds = %vector.body, %entry 3591 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3592 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3593 %wide.load = load <4 x i32>, ptr %0, align 4 3594 %1 = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3595 store <4 x i32> %1, ptr %0, align 4 3596 %index.next = add nuw i64 %index, 4 3597 %2 = icmp eq i64 %index.next, 1024 3598 br i1 %2, label %for.cond.cleanup, label %vector.body 3599 3600for.cond.cleanup: ; preds = %vector.body 3601 ret void 3602} 3603 3604declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3605 3606define void @sink_splat_vp_ashr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3607; CHECK-LABEL: sink_splat_vp_ashr: 3608; CHECK: # %bb.0: # %entry 3609; CHECK-NEXT: lui a3, 1 3610; CHECK-NEXT: add a3, a0, a3 3611; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3612; CHECK-NEXT: .LBB67_1: # %vector.body 3613; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3614; CHECK-NEXT: vle32.v v8, (a0) 3615; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3616; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t 3617; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3618; CHECK-NEXT: vse32.v v8, (a0) 3619; CHECK-NEXT: addi a0, a0, 16 3620; CHECK-NEXT: bne a0, a3, .LBB67_1 3621; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3622; CHECK-NEXT: ret 3623entry: 3624 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3625 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3626 br label %vector.body 3627 3628vector.body: ; preds = %vector.body, %entry 3629 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3630 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3631 %wide.load = load <4 x i32>, ptr %0, align 4 3632 %1 = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3633 store <4 x i32> %1, ptr %0, align 4 3634 %index.next = add nuw i64 %index, 4 3635 %2 = icmp eq i64 %index.next, 1024 3636 br i1 %2, label %for.cond.cleanup, label %vector.body 3637 3638for.cond.cleanup: ; preds = %vector.body 3639 ret void 3640} 3641 3642declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) 3643 3644define void @sink_splat_vp_fmul(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { 3645; CHECK-LABEL: sink_splat_vp_fmul: 3646; CHECK: # %bb.0: # %entry 3647; CHECK-NEXT: lui a2, 1 3648; CHECK-NEXT: add a2, a0, a2 3649; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3650; CHECK-NEXT: .LBB68_1: # %vector.body 3651; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3652; CHECK-NEXT: vle32.v v8, (a0) 3653; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 3654; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t 3655; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3656; CHECK-NEXT: vse32.v v8, (a0) 3657; CHECK-NEXT: addi a0, a0, 16 3658; CHECK-NEXT: bne a0, a2, .LBB68_1 3659; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3660; CHECK-NEXT: ret 3661entry: 3662 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 3663 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 3664 br label %vector.body 3665 3666vector.body: ; preds = %vector.body, %entry 3667 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3668 %0 = getelementptr inbounds float, ptr %a, i64 %index 3669 %wide.load = load <4 x float>, ptr %0, align 4 3670 %1 = call <4 x float> @llvm.vp.fmul.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) 3671 store <4 x float> %1, ptr %0, align 4 3672 %index.next = add nuw i64 %index, 4 3673 %2 = icmp eq i64 %index.next, 1024 3674 br i1 %2, label %for.cond.cleanup, label %vector.body 3675 3676for.cond.cleanup: ; preds = %vector.body 3677 ret void 3678} 3679 3680declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) 3681 3682define void @sink_splat_vp_fdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { 3683; CHECK-LABEL: sink_splat_vp_fdiv: 3684; CHECK: # %bb.0: # %entry 3685; CHECK-NEXT: lui a2, 1 3686; CHECK-NEXT: add a2, a0, a2 3687; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3688; CHECK-NEXT: .LBB69_1: # %vector.body 3689; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3690; CHECK-NEXT: vle32.v v8, (a0) 3691; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 3692; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t 3693; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3694; CHECK-NEXT: vse32.v v8, (a0) 3695; CHECK-NEXT: addi a0, a0, 16 3696; CHECK-NEXT: bne a0, a2, .LBB69_1 3697; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3698; CHECK-NEXT: ret 3699entry: 3700 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 3701 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 3702 br label %vector.body 3703 3704vector.body: ; preds = %vector.body, %entry 3705 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3706 %0 = getelementptr inbounds float, ptr %a, i64 %index 3707 %wide.load = load <4 x float>, ptr %0, align 4 3708 %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) 3709 store <4 x float> %1, ptr %0, align 4 3710 %index.next = add nuw i64 %index, 4 3711 %2 = icmp eq i64 %index.next, 1024 3712 br i1 %2, label %for.cond.cleanup, label %vector.body 3713 3714for.cond.cleanup: ; preds = %vector.body 3715 ret void 3716} 3717 3718define void @sink_splat_vp_frdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { 3719; CHECK-LABEL: sink_splat_vp_frdiv: 3720; CHECK: # %bb.0: # %entry 3721; CHECK-NEXT: lui a2, 1 3722; CHECK-NEXT: add a2, a0, a2 3723; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3724; CHECK-NEXT: .LBB70_1: # %vector.body 3725; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3726; CHECK-NEXT: vle32.v v8, (a0) 3727; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 3728; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t 3729; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3730; CHECK-NEXT: vse32.v v8, (a0) 3731; CHECK-NEXT: addi a0, a0, 16 3732; CHECK-NEXT: bne a0, a2, .LBB70_1 3733; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3734; CHECK-NEXT: ret 3735entry: 3736 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 3737 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 3738 br label %vector.body 3739 3740vector.body: ; preds = %vector.body, %entry 3741 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3742 %0 = getelementptr inbounds float, ptr %a, i64 %index 3743 %wide.load = load <4 x float>, ptr %0, align 4 3744 %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl) 3745 store <4 x float> %1, ptr %0, align 4 3746 %index.next = add nuw i64 %index, 4 3747 %2 = icmp eq i64 %index.next, 1024 3748 br i1 %2, label %for.cond.cleanup, label %vector.body 3749 3750for.cond.cleanup: ; preds = %vector.body 3751 ret void 3752} 3753 3754declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) 3755 3756define void @sink_splat_vp_fadd(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { 3757; CHECK-LABEL: sink_splat_vp_fadd: 3758; CHECK: # %bb.0: # %entry 3759; CHECK-NEXT: lui a2, 1 3760; CHECK-NEXT: add a2, a0, a2 3761; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3762; CHECK-NEXT: .LBB71_1: # %vector.body 3763; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3764; CHECK-NEXT: vle32.v v8, (a0) 3765; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 3766; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 3767; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3768; CHECK-NEXT: vse32.v v8, (a0) 3769; CHECK-NEXT: addi a0, a0, 16 3770; CHECK-NEXT: bne a0, a2, .LBB71_1 3771; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3772; CHECK-NEXT: ret 3773entry: 3774 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 3775 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 3776 br label %vector.body 3777 3778vector.body: ; preds = %vector.body, %entry 3779 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3780 %0 = getelementptr inbounds float, ptr %a, i64 %index 3781 %wide.load = load <4 x float>, ptr %0, align 4 3782 %1 = call <4 x float> @llvm.vp.fadd.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) 3783 store <4 x float> %1, ptr %0, align 4 3784 %index.next = add nuw i64 %index, 4 3785 %2 = icmp eq i64 %index.next, 1024 3786 br i1 %2, label %for.cond.cleanup, label %vector.body 3787 3788for.cond.cleanup: ; preds = %vector.body 3789 ret void 3790} 3791 3792declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) 3793 3794define void @sink_splat_vp_fsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { 3795; CHECK-LABEL: sink_splat_vp_fsub: 3796; CHECK: # %bb.0: # %entry 3797; CHECK-NEXT: lui a2, 1 3798; CHECK-NEXT: add a2, a0, a2 3799; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3800; CHECK-NEXT: .LBB72_1: # %vector.body 3801; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3802; CHECK-NEXT: vle32.v v8, (a0) 3803; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 3804; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t 3805; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3806; CHECK-NEXT: vse32.v v8, (a0) 3807; CHECK-NEXT: addi a0, a0, 16 3808; CHECK-NEXT: bne a0, a2, .LBB72_1 3809; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3810; CHECK-NEXT: ret 3811entry: 3812 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 3813 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 3814 br label %vector.body 3815 3816vector.body: ; preds = %vector.body, %entry 3817 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3818 %0 = getelementptr inbounds float, ptr %a, i64 %index 3819 %wide.load = load <4 x float>, ptr %0, align 4 3820 %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) 3821 store <4 x float> %1, ptr %0, align 4 3822 %index.next = add nuw i64 %index, 4 3823 %2 = icmp eq i64 %index.next, 1024 3824 br i1 %2, label %for.cond.cleanup, label %vector.body 3825 3826for.cond.cleanup: ; preds = %vector.body 3827 ret void 3828} 3829 3830declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) 3831 3832define void @sink_splat_vp_frsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { 3833; CHECK-LABEL: sink_splat_vp_frsub: 3834; CHECK: # %bb.0: # %entry 3835; CHECK-NEXT: lui a2, 1 3836; CHECK-NEXT: add a2, a0, a2 3837; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3838; CHECK-NEXT: .LBB73_1: # %vector.body 3839; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3840; CHECK-NEXT: vle32.v v8, (a0) 3841; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 3842; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t 3843; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3844; CHECK-NEXT: vse32.v v8, (a0) 3845; CHECK-NEXT: addi a0, a0, 16 3846; CHECK-NEXT: bne a0, a2, .LBB73_1 3847; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3848; CHECK-NEXT: ret 3849entry: 3850 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 3851 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 3852 br label %vector.body 3853 3854vector.body: ; preds = %vector.body, %entry 3855 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3856 %0 = getelementptr inbounds float, ptr %a, i64 %index 3857 %wide.load = load <4 x float>, ptr %0, align 4 3858 %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl) 3859 store <4 x float> %1, ptr %0, align 4 3860 %index.next = add nuw i64 %index, 4 3861 %2 = icmp eq i64 %index.next, 1024 3862 br i1 %2, label %for.cond.cleanup, label %vector.body 3863 3864for.cond.cleanup: ; preds = %vector.body 3865 ret void 3866} 3867 3868declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3869 3870define void @sink_splat_vp_udiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3871; CHECK-LABEL: sink_splat_vp_udiv: 3872; CHECK: # %bb.0: # %entry 3873; CHECK-NEXT: lui a3, 1 3874; CHECK-NEXT: add a3, a0, a3 3875; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3876; CHECK-NEXT: .LBB74_1: # %vector.body 3877; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3878; CHECK-NEXT: vle32.v v8, (a0) 3879; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3880; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t 3881; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3882; CHECK-NEXT: vse32.v v8, (a0) 3883; CHECK-NEXT: addi a0, a0, 16 3884; CHECK-NEXT: bne a0, a3, .LBB74_1 3885; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3886; CHECK-NEXT: ret 3887entry: 3888 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3889 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3890 br label %vector.body 3891 3892vector.body: ; preds = %vector.body, %entry 3893 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3894 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3895 %wide.load = load <4 x i32>, ptr %0, align 4 3896 %1 = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3897 store <4 x i32> %1, ptr %0, align 4 3898 %index.next = add nuw i64 %index, 4 3899 %2 = icmp eq i64 %index.next, 1024 3900 br i1 %2, label %for.cond.cleanup, label %vector.body 3901 3902for.cond.cleanup: ; preds = %vector.body 3903 ret void 3904} 3905 3906declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3907 3908define void @sink_splat_vp_sdiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3909; CHECK-LABEL: sink_splat_vp_sdiv: 3910; CHECK: # %bb.0: # %entry 3911; CHECK-NEXT: lui a3, 1 3912; CHECK-NEXT: add a3, a0, a3 3913; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3914; CHECK-NEXT: .LBB75_1: # %vector.body 3915; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3916; CHECK-NEXT: vle32.v v8, (a0) 3917; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3918; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t 3919; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3920; CHECK-NEXT: vse32.v v8, (a0) 3921; CHECK-NEXT: addi a0, a0, 16 3922; CHECK-NEXT: bne a0, a3, .LBB75_1 3923; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3924; CHECK-NEXT: ret 3925entry: 3926 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3927 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3928 br label %vector.body 3929 3930vector.body: ; preds = %vector.body, %entry 3931 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3932 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3933 %wide.load = load <4 x i32>, ptr %0, align 4 3934 %1 = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3935 store <4 x i32> %1, ptr %0, align 4 3936 %index.next = add nuw i64 %index, 4 3937 %2 = icmp eq i64 %index.next, 1024 3938 br i1 %2, label %for.cond.cleanup, label %vector.body 3939 3940for.cond.cleanup: ; preds = %vector.body 3941 ret void 3942} 3943 3944declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3945 3946define void @sink_splat_vp_urem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3947; CHECK-LABEL: sink_splat_vp_urem: 3948; CHECK: # %bb.0: # %entry 3949; CHECK-NEXT: lui a3, 1 3950; CHECK-NEXT: add a3, a0, a3 3951; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3952; CHECK-NEXT: .LBB76_1: # %vector.body 3953; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3954; CHECK-NEXT: vle32.v v8, (a0) 3955; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3956; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t 3957; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3958; CHECK-NEXT: vse32.v v8, (a0) 3959; CHECK-NEXT: addi a0, a0, 16 3960; CHECK-NEXT: bne a0, a3, .LBB76_1 3961; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 3962; CHECK-NEXT: ret 3963entry: 3964 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 3965 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 3966 br label %vector.body 3967 3968vector.body: ; preds = %vector.body, %entry 3969 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3970 %0 = getelementptr inbounds i32, ptr %a, i64 %index 3971 %wide.load = load <4 x i32>, ptr %0, align 4 3972 %1 = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 3973 store <4 x i32> %1, ptr %0, align 4 3974 %index.next = add nuw i64 %index, 4 3975 %2 = icmp eq i64 %index.next, 1024 3976 br i1 %2, label %for.cond.cleanup, label %vector.body 3977 3978for.cond.cleanup: ; preds = %vector.body 3979 ret void 3980} 3981 3982declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 3983 3984define void @sink_splat_vp_srem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 3985; CHECK-LABEL: sink_splat_vp_srem: 3986; CHECK: # %bb.0: # %entry 3987; CHECK-NEXT: lui a3, 1 3988; CHECK-NEXT: add a3, a0, a3 3989; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3990; CHECK-NEXT: .LBB77_1: # %vector.body 3991; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 3992; CHECK-NEXT: vle32.v v8, (a0) 3993; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 3994; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t 3995; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3996; CHECK-NEXT: vse32.v v8, (a0) 3997; CHECK-NEXT: addi a0, a0, 16 3998; CHECK-NEXT: bne a0, a3, .LBB77_1 3999; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4000; CHECK-NEXT: ret 4001entry: 4002 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 4003 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 4004 br label %vector.body 4005 4006vector.body: ; preds = %vector.body, %entry 4007 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4008 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4009 %wide.load = load <4 x i32>, ptr %0, align 4 4010 %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 4011 store <4 x i32> %1, ptr %0, align 4 4012 %index.next = add nuw i64 %index, 4 4013 %2 = icmp eq i64 %index.next, 1024 4014 br i1 %2, label %for.cond.cleanup, label %vector.body 4015 4016for.cond.cleanup: ; preds = %vector.body 4017 ret void 4018} 4019 4020; Check that we don't sink a splat operand that has no chance of being folded. 4021 4022define void @sink_splat_vp_srem_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 4023; CHECK-LABEL: sink_splat_vp_srem_commute: 4024; CHECK: # %bb.0: # %entry 4025; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4026; CHECK-NEXT: vmv.v.x v8, a1 4027; CHECK-NEXT: lui a1, 1 4028; CHECK-NEXT: add a1, a0, a1 4029; CHECK-NEXT: .LBB78_1: # %vector.body 4030; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4031; CHECK-NEXT: vle32.v v9, (a0) 4032; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 4033; CHECK-NEXT: vrem.vv v9, v8, v9, v0.t 4034; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4035; CHECK-NEXT: vse32.v v9, (a0) 4036; CHECK-NEXT: addi a0, a0, 16 4037; CHECK-NEXT: bne a0, a1, .LBB78_1 4038; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4039; CHECK-NEXT: ret 4040entry: 4041 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 4042 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 4043 br label %vector.body 4044 4045vector.body: ; preds = %vector.body, %entry 4046 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4047 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4048 %wide.load = load <4 x i32>, ptr %0, align 4 4049 %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 4050 store <4 x i32> %1, ptr %0, align 4 4051 %index.next = add nuw i64 %index, 4 4052 %2 = icmp eq i64 %index.next, 1024 4053 br i1 %2, label %for.cond.cleanup, label %vector.body 4054 4055for.cond.cleanup: ; preds = %vector.body 4056 ret void 4057} 4058 4059declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) 4060 4061define void @sink_splat_vp_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) { 4062; CHECK-LABEL: sink_splat_vp_fma: 4063; CHECK: # %bb.0: # %entry 4064; CHECK-NEXT: lui a3, 1 4065; CHECK-NEXT: add a3, a1, a3 4066; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4067; CHECK-NEXT: .LBB79_1: # %vector.body 4068; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4069; CHECK-NEXT: vle32.v v8, (a0) 4070; CHECK-NEXT: vle32.v v9, (a1) 4071; CHECK-NEXT: addi a1, a1, 16 4072; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 4073; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 4074; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4075; CHECK-NEXT: vse32.v v8, (a0) 4076; CHECK-NEXT: addi a0, a0, 16 4077; CHECK-NEXT: bne a1, a3, .LBB79_1 4078; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4079; CHECK-NEXT: ret 4080entry: 4081 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 4082 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 4083 br label %vector.body 4084 4085vector.body: ; preds = %vector.body, %entry 4086 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4087 %0 = getelementptr inbounds float, ptr %a, i64 %index 4088 %wide.load = load <4 x float>, ptr %0, align 4 4089 %1 = getelementptr inbounds float, ptr %b, i64 %index 4090 %wide.load12 = load <4 x float>, ptr %1, align 4 4091 %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl) 4092 store <4 x float> %2, ptr %0, align 4 4093 %index.next = add nuw i64 %index, 4 4094 %3 = icmp eq i64 %index.next, 1024 4095 br i1 %3, label %for.cond.cleanup, label %vector.body 4096 4097for.cond.cleanup: ; preds = %vector.body 4098 ret void 4099} 4100 4101define void @sink_splat_vp_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) { 4102; CHECK-LABEL: sink_splat_vp_fma_commute: 4103; CHECK: # %bb.0: # %entry 4104; CHECK-NEXT: lui a3, 1 4105; CHECK-NEXT: add a3, a1, a3 4106; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4107; CHECK-NEXT: .LBB80_1: # %vector.body 4108; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4109; CHECK-NEXT: vle32.v v8, (a0) 4110; CHECK-NEXT: vle32.v v9, (a1) 4111; CHECK-NEXT: addi a1, a1, 16 4112; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 4113; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 4114; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4115; CHECK-NEXT: vse32.v v8, (a0) 4116; CHECK-NEXT: addi a0, a0, 16 4117; CHECK-NEXT: bne a1, a3, .LBB80_1 4118; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4119; CHECK-NEXT: ret 4120entry: 4121 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 4122 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 4123 br label %vector.body 4124 4125vector.body: ; preds = %vector.body, %entry 4126 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4127 %0 = getelementptr inbounds float, ptr %a, i64 %index 4128 %wide.load = load <4 x float>, ptr %0, align 4 4129 %1 = getelementptr inbounds float, ptr %b, i64 %index 4130 %wide.load12 = load <4 x float>, ptr %1, align 4 4131 %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl) 4132 store <4 x float> %2, ptr %0, align 4 4133 %index.next = add nuw i64 %index, 4 4134 %3 = icmp eq i64 %index.next, 1024 4135 br i1 %3, label %for.cond.cleanup, label %vector.body 4136 4137for.cond.cleanup: ; preds = %vector.body 4138 ret void 4139} 4140 4141 4142define void @sink_splat_mul_lmul2(ptr nocapture %a, i64 signext %x) { 4143; CHECK-LABEL: sink_splat_mul_lmul2: 4144; CHECK: # %bb.0: # %entry 4145; CHECK-NEXT: lui a2, 2 4146; CHECK-NEXT: add a2, a0, a2 4147; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4148; CHECK-NEXT: .LBB81_1: # %vector.body 4149; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4150; CHECK-NEXT: vle64.v v8, (a0) 4151; CHECK-NEXT: vmul.vx v8, v8, a1 4152; CHECK-NEXT: vse64.v v8, (a0) 4153; CHECK-NEXT: addi a0, a0, 32 4154; CHECK-NEXT: bne a0, a2, .LBB81_1 4155; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4156; CHECK-NEXT: ret 4157entry: 4158 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4159 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4160 br label %vector.body 4161 4162vector.body: ; preds = %vector.body, %entry 4163 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4164 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4165 %wide.load = load <4 x i64>, ptr %0, align 8 4166 %1 = mul <4 x i64> %wide.load, %broadcast.splat 4167 store <4 x i64> %1, ptr %0, align 8 4168 %index.next = add nuw i64 %index, 4 4169 %2 = icmp eq i64 %index.next, 1024 4170 br i1 %2, label %for.cond.cleanup, label %vector.body 4171 4172for.cond.cleanup: ; preds = %vector.body 4173 ret void 4174} 4175 4176define void @sink_splat_add_lmul2(ptr nocapture %a, i64 signext %x) { 4177; CHECK-LABEL: sink_splat_add_lmul2: 4178; CHECK: # %bb.0: # %entry 4179; CHECK-NEXT: lui a2, 2 4180; CHECK-NEXT: add a2, a0, a2 4181; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4182; CHECK-NEXT: .LBB82_1: # %vector.body 4183; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4184; CHECK-NEXT: vle64.v v8, (a0) 4185; CHECK-NEXT: vadd.vx v8, v8, a1 4186; CHECK-NEXT: vse64.v v8, (a0) 4187; CHECK-NEXT: addi a0, a0, 32 4188; CHECK-NEXT: bne a0, a2, .LBB82_1 4189; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4190; CHECK-NEXT: ret 4191entry: 4192 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4193 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4194 br label %vector.body 4195 4196vector.body: ; preds = %vector.body, %entry 4197 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4198 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4199 %wide.load = load <4 x i64>, ptr %0, align 8 4200 %1 = add <4 x i64> %wide.load, %broadcast.splat 4201 store <4 x i64> %1, ptr %0, align 8 4202 %index.next = add nuw i64 %index, 4 4203 %2 = icmp eq i64 %index.next, 1024 4204 br i1 %2, label %for.cond.cleanup, label %vector.body 4205 4206for.cond.cleanup: ; preds = %vector.body 4207 ret void 4208} 4209 4210define void @sink_splat_sub_lmul2(ptr nocapture %a, i64 signext %x) { 4211; CHECK-LABEL: sink_splat_sub_lmul2: 4212; CHECK: # %bb.0: # %entry 4213; CHECK-NEXT: lui a2, 2 4214; CHECK-NEXT: add a2, a0, a2 4215; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4216; CHECK-NEXT: .LBB83_1: # %vector.body 4217; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4218; CHECK-NEXT: vle64.v v8, (a0) 4219; CHECK-NEXT: vsub.vx v8, v8, a1 4220; CHECK-NEXT: vse64.v v8, (a0) 4221; CHECK-NEXT: addi a0, a0, 32 4222; CHECK-NEXT: bne a0, a2, .LBB83_1 4223; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4224; CHECK-NEXT: ret 4225entry: 4226 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4227 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4228 br label %vector.body 4229 4230vector.body: ; preds = %vector.body, %entry 4231 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4232 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4233 %wide.load = load <4 x i64>, ptr %0, align 8 4234 %1 = sub <4 x i64> %wide.load, %broadcast.splat 4235 store <4 x i64> %1, ptr %0, align 8 4236 %index.next = add nuw i64 %index, 4 4237 %2 = icmp eq i64 %index.next, 1024 4238 br i1 %2, label %for.cond.cleanup, label %vector.body 4239 4240for.cond.cleanup: ; preds = %vector.body 4241 ret void 4242} 4243 4244define void @sink_splat_rsub_lmul2(ptr nocapture %a, i64 signext %x) { 4245; CHECK-LABEL: sink_splat_rsub_lmul2: 4246; CHECK: # %bb.0: # %entry 4247; CHECK-NEXT: lui a2, 2 4248; CHECK-NEXT: add a2, a0, a2 4249; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4250; CHECK-NEXT: .LBB84_1: # %vector.body 4251; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4252; CHECK-NEXT: vle64.v v8, (a0) 4253; CHECK-NEXT: vrsub.vx v8, v8, a1 4254; CHECK-NEXT: vse64.v v8, (a0) 4255; CHECK-NEXT: addi a0, a0, 32 4256; CHECK-NEXT: bne a0, a2, .LBB84_1 4257; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4258; CHECK-NEXT: ret 4259entry: 4260 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4261 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4262 br label %vector.body 4263 4264vector.body: ; preds = %vector.body, %entry 4265 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4266 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4267 %wide.load = load <4 x i64>, ptr %0, align 8 4268 %1 = sub <4 x i64> %broadcast.splat, %wide.load 4269 store <4 x i64> %1, ptr %0, align 8 4270 %index.next = add nuw i64 %index, 4 4271 %2 = icmp eq i64 %index.next, 1024 4272 br i1 %2, label %for.cond.cleanup, label %vector.body 4273 4274for.cond.cleanup: ; preds = %vector.body 4275 ret void 4276} 4277 4278define void @sink_splat_and_lmul2(ptr nocapture %a, i64 signext %x) { 4279; CHECK-LABEL: sink_splat_and_lmul2: 4280; CHECK: # %bb.0: # %entry 4281; CHECK-NEXT: lui a2, 2 4282; CHECK-NEXT: add a2, a0, a2 4283; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4284; CHECK-NEXT: .LBB85_1: # %vector.body 4285; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4286; CHECK-NEXT: vle64.v v8, (a0) 4287; CHECK-NEXT: vand.vx v8, v8, a1 4288; CHECK-NEXT: vse64.v v8, (a0) 4289; CHECK-NEXT: addi a0, a0, 32 4290; CHECK-NEXT: bne a0, a2, .LBB85_1 4291; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4292; CHECK-NEXT: ret 4293entry: 4294 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4295 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4296 br label %vector.body 4297 4298vector.body: ; preds = %vector.body, %entry 4299 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4300 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4301 %wide.load = load <4 x i64>, ptr %0, align 8 4302 %1 = and <4 x i64> %wide.load, %broadcast.splat 4303 store <4 x i64> %1, ptr %0, align 8 4304 %index.next = add nuw i64 %index, 4 4305 %2 = icmp eq i64 %index.next, 1024 4306 br i1 %2, label %for.cond.cleanup, label %vector.body 4307 4308for.cond.cleanup: ; preds = %vector.body 4309 ret void 4310} 4311 4312define void @sink_splat_or_lmul2(ptr nocapture %a, i64 signext %x) { 4313; CHECK-LABEL: sink_splat_or_lmul2: 4314; CHECK: # %bb.0: # %entry 4315; CHECK-NEXT: lui a2, 2 4316; CHECK-NEXT: add a2, a0, a2 4317; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4318; CHECK-NEXT: .LBB86_1: # %vector.body 4319; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4320; CHECK-NEXT: vle64.v v8, (a0) 4321; CHECK-NEXT: vor.vx v8, v8, a1 4322; CHECK-NEXT: vse64.v v8, (a0) 4323; CHECK-NEXT: addi a0, a0, 32 4324; CHECK-NEXT: bne a0, a2, .LBB86_1 4325; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4326; CHECK-NEXT: ret 4327entry: 4328 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4329 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4330 br label %vector.body 4331 4332vector.body: ; preds = %vector.body, %entry 4333 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4334 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4335 %wide.load = load <4 x i64>, ptr %0, align 8 4336 %1 = or <4 x i64> %wide.load, %broadcast.splat 4337 store <4 x i64> %1, ptr %0, align 8 4338 %index.next = add nuw i64 %index, 4 4339 %2 = icmp eq i64 %index.next, 1024 4340 br i1 %2, label %for.cond.cleanup, label %vector.body 4341 4342for.cond.cleanup: ; preds = %vector.body 4343 ret void 4344} 4345 4346define void @sink_splat_xor_lmul2(ptr nocapture %a, i64 signext %x) { 4347; CHECK-LABEL: sink_splat_xor_lmul2: 4348; CHECK: # %bb.0: # %entry 4349; CHECK-NEXT: lui a2, 2 4350; CHECK-NEXT: add a2, a0, a2 4351; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 4352; CHECK-NEXT: .LBB87_1: # %vector.body 4353; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4354; CHECK-NEXT: vle64.v v8, (a0) 4355; CHECK-NEXT: vxor.vx v8, v8, a1 4356; CHECK-NEXT: vse64.v v8, (a0) 4357; CHECK-NEXT: addi a0, a0, 32 4358; CHECK-NEXT: bne a0, a2, .LBB87_1 4359; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4360; CHECK-NEXT: ret 4361entry: 4362 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 4363 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer 4364 br label %vector.body 4365 4366vector.body: ; preds = %vector.body, %entry 4367 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4368 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4369 %wide.load = load <4 x i64>, ptr %0, align 8 4370 %1 = xor <4 x i64> %wide.load, %broadcast.splat 4371 store <4 x i64> %1, ptr %0, align 8 4372 %index.next = add nuw i64 %index, 4 4373 %2 = icmp eq i64 %index.next, 1024 4374 br i1 %2, label %for.cond.cleanup, label %vector.body 4375 4376for.cond.cleanup: ; preds = %vector.body 4377 ret void 4378} 4379 4380define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) { 4381; CHECK-LABEL: sink_splat_mul_lmul8: 4382; CHECK: # %bb.0: # %entry 4383; CHECK-NEXT: lui a2, 1 4384; CHECK-NEXT: add a2, a0, a2 4385; CHECK-NEXT: li a3, 32 4386; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4387; CHECK-NEXT: .LBB88_1: # %vector.body 4388; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4389; CHECK-NEXT: vle32.v v8, (a0) 4390; CHECK-NEXT: vmul.vx v8, v8, a1 4391; CHECK-NEXT: vse32.v v8, (a0) 4392; CHECK-NEXT: addi a0, a0, 16 4393; CHECK-NEXT: bne a0, a2, .LBB88_1 4394; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4395; CHECK-NEXT: ret 4396entry: 4397 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4398 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4399 br label %vector.body 4400 4401vector.body: ; preds = %vector.body, %entry 4402 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4403 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4404 %wide.load = load <32 x i32>, ptr %0, align 4 4405 %1 = mul <32 x i32> %wide.load, %broadcast.splat 4406 store <32 x i32> %1, ptr %0, align 4 4407 %index.next = add nuw i64 %index, 4 4408 %2 = icmp eq i64 %index.next, 1024 4409 br i1 %2, label %for.cond.cleanup, label %vector.body 4410 4411for.cond.cleanup: ; preds = %vector.body 4412 ret void 4413} 4414 4415define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) { 4416; CHECK-LABEL: sink_splat_add_lmul8: 4417; CHECK: # %bb.0: # %entry 4418; CHECK-NEXT: lui a2, 1 4419; CHECK-NEXT: add a2, a0, a2 4420; CHECK-NEXT: li a3, 32 4421; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4422; CHECK-NEXT: .LBB89_1: # %vector.body 4423; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4424; CHECK-NEXT: vle32.v v8, (a0) 4425; CHECK-NEXT: vadd.vx v8, v8, a1 4426; CHECK-NEXT: vse32.v v8, (a0) 4427; CHECK-NEXT: addi a0, a0, 16 4428; CHECK-NEXT: bne a0, a2, .LBB89_1 4429; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4430; CHECK-NEXT: ret 4431entry: 4432 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4433 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4434 br label %vector.body 4435 4436vector.body: ; preds = %vector.body, %entry 4437 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4438 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4439 %wide.load = load <32 x i32>, ptr %0, align 4 4440 %1 = add <32 x i32> %wide.load, %broadcast.splat 4441 store <32 x i32> %1, ptr %0, align 4 4442 %index.next = add nuw i64 %index, 4 4443 %2 = icmp eq i64 %index.next, 1024 4444 br i1 %2, label %for.cond.cleanup, label %vector.body 4445 4446for.cond.cleanup: ; preds = %vector.body 4447 ret void 4448} 4449 4450define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) { 4451; CHECK-LABEL: sink_splat_sub_lmul8: 4452; CHECK: # %bb.0: # %entry 4453; CHECK-NEXT: lui a2, 1 4454; CHECK-NEXT: add a2, a0, a2 4455; CHECK-NEXT: li a3, 32 4456; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4457; CHECK-NEXT: .LBB90_1: # %vector.body 4458; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4459; CHECK-NEXT: vle32.v v8, (a0) 4460; CHECK-NEXT: vsub.vx v8, v8, a1 4461; CHECK-NEXT: vse32.v v8, (a0) 4462; CHECK-NEXT: addi a0, a0, 16 4463; CHECK-NEXT: bne a0, a2, .LBB90_1 4464; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4465; CHECK-NEXT: ret 4466entry: 4467 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4468 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4469 br label %vector.body 4470 4471vector.body: ; preds = %vector.body, %entry 4472 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4473 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4474 %wide.load = load <32 x i32>, ptr %0, align 4 4475 %1 = sub <32 x i32> %wide.load, %broadcast.splat 4476 store <32 x i32> %1, ptr %0, align 4 4477 %index.next = add nuw i64 %index, 4 4478 %2 = icmp eq i64 %index.next, 1024 4479 br i1 %2, label %for.cond.cleanup, label %vector.body 4480 4481for.cond.cleanup: ; preds = %vector.body 4482 ret void 4483} 4484 4485define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) { 4486; CHECK-LABEL: sink_splat_rsub_lmul8: 4487; CHECK: # %bb.0: # %entry 4488; CHECK-NEXT: lui a2, 1 4489; CHECK-NEXT: add a2, a0, a2 4490; CHECK-NEXT: li a3, 32 4491; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4492; CHECK-NEXT: .LBB91_1: # %vector.body 4493; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4494; CHECK-NEXT: vle32.v v8, (a0) 4495; CHECK-NEXT: vrsub.vx v8, v8, a1 4496; CHECK-NEXT: vse32.v v8, (a0) 4497; CHECK-NEXT: addi a0, a0, 16 4498; CHECK-NEXT: bne a0, a2, .LBB91_1 4499; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4500; CHECK-NEXT: ret 4501entry: 4502 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4503 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4504 br label %vector.body 4505 4506vector.body: ; preds = %vector.body, %entry 4507 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4508 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4509 %wide.load = load <32 x i32>, ptr %0, align 4 4510 %1 = sub <32 x i32> %broadcast.splat, %wide.load 4511 store <32 x i32> %1, ptr %0, align 4 4512 %index.next = add nuw i64 %index, 4 4513 %2 = icmp eq i64 %index.next, 1024 4514 br i1 %2, label %for.cond.cleanup, label %vector.body 4515 4516for.cond.cleanup: ; preds = %vector.body 4517 ret void 4518} 4519 4520define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) { 4521; CHECK-LABEL: sink_splat_and_lmul8: 4522; CHECK: # %bb.0: # %entry 4523; CHECK-NEXT: lui a2, 1 4524; CHECK-NEXT: add a2, a0, a2 4525; CHECK-NEXT: li a3, 32 4526; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4527; CHECK-NEXT: .LBB92_1: # %vector.body 4528; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4529; CHECK-NEXT: vle32.v v8, (a0) 4530; CHECK-NEXT: vand.vx v8, v8, a1 4531; CHECK-NEXT: vse32.v v8, (a0) 4532; CHECK-NEXT: addi a0, a0, 16 4533; CHECK-NEXT: bne a0, a2, .LBB92_1 4534; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4535; CHECK-NEXT: ret 4536entry: 4537 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4538 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4539 br label %vector.body 4540 4541vector.body: ; preds = %vector.body, %entry 4542 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4543 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4544 %wide.load = load <32 x i32>, ptr %0, align 4 4545 %1 = and <32 x i32> %wide.load, %broadcast.splat 4546 store <32 x i32> %1, ptr %0, align 4 4547 %index.next = add nuw i64 %index, 4 4548 %2 = icmp eq i64 %index.next, 1024 4549 br i1 %2, label %for.cond.cleanup, label %vector.body 4550 4551for.cond.cleanup: ; preds = %vector.body 4552 ret void 4553} 4554 4555define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) { 4556; CHECK-LABEL: sink_splat_or_lmul8: 4557; CHECK: # %bb.0: # %entry 4558; CHECK-NEXT: lui a2, 1 4559; CHECK-NEXT: add a2, a0, a2 4560; CHECK-NEXT: li a3, 32 4561; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4562; CHECK-NEXT: .LBB93_1: # %vector.body 4563; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4564; CHECK-NEXT: vle32.v v8, (a0) 4565; CHECK-NEXT: vor.vx v8, v8, a1 4566; CHECK-NEXT: vse32.v v8, (a0) 4567; CHECK-NEXT: addi a0, a0, 16 4568; CHECK-NEXT: bne a0, a2, .LBB93_1 4569; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4570; CHECK-NEXT: ret 4571entry: 4572 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4573 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4574 br label %vector.body 4575 4576vector.body: ; preds = %vector.body, %entry 4577 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4578 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4579 %wide.load = load <32 x i32>, ptr %0, align 4 4580 %1 = or <32 x i32> %wide.load, %broadcast.splat 4581 store <32 x i32> %1, ptr %0, align 4 4582 %index.next = add nuw i64 %index, 4 4583 %2 = icmp eq i64 %index.next, 1024 4584 br i1 %2, label %for.cond.cleanup, label %vector.body 4585 4586for.cond.cleanup: ; preds = %vector.body 4587 ret void 4588} 4589 4590define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) { 4591; CHECK-LABEL: sink_splat_xor_lmul8: 4592; CHECK: # %bb.0: # %entry 4593; CHECK-NEXT: lui a2, 1 4594; CHECK-NEXT: add a2, a0, a2 4595; CHECK-NEXT: li a3, 32 4596; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 4597; CHECK-NEXT: .LBB94_1: # %vector.body 4598; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4599; CHECK-NEXT: vle32.v v8, (a0) 4600; CHECK-NEXT: vxor.vx v8, v8, a1 4601; CHECK-NEXT: vse32.v v8, (a0) 4602; CHECK-NEXT: addi a0, a0, 16 4603; CHECK-NEXT: bne a0, a2, .LBB94_1 4604; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4605; CHECK-NEXT: ret 4606entry: 4607 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 4608 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer 4609 br label %vector.body 4610 4611vector.body: ; preds = %vector.body, %entry 4612 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4613 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4614 %wide.load = load <32 x i32>, ptr %0, align 4 4615 %1 = xor <32 x i32> %wide.load, %broadcast.splat 4616 store <32 x i32> %1, ptr %0, align 4 4617 %index.next = add nuw i64 %index, 4 4618 %2 = icmp eq i64 %index.next, 1024 4619 br i1 %2, label %for.cond.cleanup, label %vector.body 4620 4621for.cond.cleanup: ; preds = %vector.body 4622 ret void 4623} 4624 4625define void @sink_splat_mul_lmulmf2(ptr nocapture %a, i32 signext %x) { 4626; CHECK-LABEL: sink_splat_mul_lmulmf2: 4627; CHECK: # %bb.0: # %entry 4628; CHECK-NEXT: lui a2, 2 4629; CHECK-NEXT: add a2, a0, a2 4630; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4631; CHECK-NEXT: .LBB95_1: # %vector.body 4632; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4633; CHECK-NEXT: vle32.v v8, (a0) 4634; CHECK-NEXT: vmul.vx v8, v8, a1 4635; CHECK-NEXT: vse32.v v8, (a0) 4636; CHECK-NEXT: addi a0, a0, 32 4637; CHECK-NEXT: bne a0, a2, .LBB95_1 4638; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4639; CHECK-NEXT: ret 4640entry: 4641 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4642 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4643 br label %vector.body 4644 4645vector.body: ; preds = %vector.body, %entry 4646 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4647 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4648 %wide.load = load <2 x i32>, ptr %0, align 8 4649 %1 = mul <2 x i32> %wide.load, %broadcast.splat 4650 store <2 x i32> %1, ptr %0, align 8 4651 %index.next = add nuw i64 %index, 4 4652 %2 = icmp eq i64 %index.next, 1024 4653 br i1 %2, label %for.cond.cleanup, label %vector.body 4654 4655for.cond.cleanup: ; preds = %vector.body 4656 ret void 4657} 4658 4659define void @sink_splat_add_lmulmf2(ptr nocapture %a, i32 signext %x) { 4660; CHECK-LABEL: sink_splat_add_lmulmf2: 4661; CHECK: # %bb.0: # %entry 4662; CHECK-NEXT: lui a2, 2 4663; CHECK-NEXT: add a2, a0, a2 4664; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4665; CHECK-NEXT: .LBB96_1: # %vector.body 4666; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4667; CHECK-NEXT: vle32.v v8, (a0) 4668; CHECK-NEXT: vadd.vx v8, v8, a1 4669; CHECK-NEXT: vse32.v v8, (a0) 4670; CHECK-NEXT: addi a0, a0, 32 4671; CHECK-NEXT: bne a0, a2, .LBB96_1 4672; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4673; CHECK-NEXT: ret 4674entry: 4675 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4676 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4677 br label %vector.body 4678 4679vector.body: ; preds = %vector.body, %entry 4680 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4681 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4682 %wide.load = load <2 x i32>, ptr %0, align 8 4683 %1 = add <2 x i32> %wide.load, %broadcast.splat 4684 store <2 x i32> %1, ptr %0, align 8 4685 %index.next = add nuw i64 %index, 4 4686 %2 = icmp eq i64 %index.next, 1024 4687 br i1 %2, label %for.cond.cleanup, label %vector.body 4688 4689for.cond.cleanup: ; preds = %vector.body 4690 ret void 4691} 4692 4693define void @sink_splat_sub_lmulmf2(ptr nocapture %a, i32 signext %x) { 4694; CHECK-LABEL: sink_splat_sub_lmulmf2: 4695; CHECK: # %bb.0: # %entry 4696; CHECK-NEXT: lui a2, 2 4697; CHECK-NEXT: add a2, a0, a2 4698; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4699; CHECK-NEXT: .LBB97_1: # %vector.body 4700; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4701; CHECK-NEXT: vle32.v v8, (a0) 4702; CHECK-NEXT: vsub.vx v8, v8, a1 4703; CHECK-NEXT: vse32.v v8, (a0) 4704; CHECK-NEXT: addi a0, a0, 32 4705; CHECK-NEXT: bne a0, a2, .LBB97_1 4706; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4707; CHECK-NEXT: ret 4708entry: 4709 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4710 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4711 br label %vector.body 4712 4713vector.body: ; preds = %vector.body, %entry 4714 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4715 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4716 %wide.load = load <2 x i32>, ptr %0, align 8 4717 %1 = sub <2 x i32> %wide.load, %broadcast.splat 4718 store <2 x i32> %1, ptr %0, align 8 4719 %index.next = add nuw i64 %index, 4 4720 %2 = icmp eq i64 %index.next, 1024 4721 br i1 %2, label %for.cond.cleanup, label %vector.body 4722 4723for.cond.cleanup: ; preds = %vector.body 4724 ret void 4725} 4726 4727define void @sink_splat_rsub_lmulmf2(ptr nocapture %a, i32 signext %x) { 4728; CHECK-LABEL: sink_splat_rsub_lmulmf2: 4729; CHECK: # %bb.0: # %entry 4730; CHECK-NEXT: lui a2, 2 4731; CHECK-NEXT: add a2, a0, a2 4732; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4733; CHECK-NEXT: .LBB98_1: # %vector.body 4734; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4735; CHECK-NEXT: vle32.v v8, (a0) 4736; CHECK-NEXT: vrsub.vx v8, v8, a1 4737; CHECK-NEXT: vse32.v v8, (a0) 4738; CHECK-NEXT: addi a0, a0, 32 4739; CHECK-NEXT: bne a0, a2, .LBB98_1 4740; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4741; CHECK-NEXT: ret 4742entry: 4743 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4744 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4745 br label %vector.body 4746 4747vector.body: ; preds = %vector.body, %entry 4748 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4749 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4750 %wide.load = load <2 x i32>, ptr %0, align 8 4751 %1 = sub <2 x i32> %broadcast.splat, %wide.load 4752 store <2 x i32> %1, ptr %0, align 8 4753 %index.next = add nuw i64 %index, 4 4754 %2 = icmp eq i64 %index.next, 1024 4755 br i1 %2, label %for.cond.cleanup, label %vector.body 4756 4757for.cond.cleanup: ; preds = %vector.body 4758 ret void 4759} 4760 4761define void @sink_splat_and_lmulmf2(ptr nocapture %a, i32 signext %x) { 4762; CHECK-LABEL: sink_splat_and_lmulmf2: 4763; CHECK: # %bb.0: # %entry 4764; CHECK-NEXT: lui a2, 2 4765; CHECK-NEXT: add a2, a0, a2 4766; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4767; CHECK-NEXT: .LBB99_1: # %vector.body 4768; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4769; CHECK-NEXT: vle32.v v8, (a0) 4770; CHECK-NEXT: vand.vx v8, v8, a1 4771; CHECK-NEXT: vse32.v v8, (a0) 4772; CHECK-NEXT: addi a0, a0, 32 4773; CHECK-NEXT: bne a0, a2, .LBB99_1 4774; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4775; CHECK-NEXT: ret 4776entry: 4777 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4778 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4779 br label %vector.body 4780 4781vector.body: ; preds = %vector.body, %entry 4782 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4783 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4784 %wide.load = load <2 x i32>, ptr %0, align 8 4785 %1 = and <2 x i32> %wide.load, %broadcast.splat 4786 store <2 x i32> %1, ptr %0, align 8 4787 %index.next = add nuw i64 %index, 4 4788 %2 = icmp eq i64 %index.next, 1024 4789 br i1 %2, label %for.cond.cleanup, label %vector.body 4790 4791for.cond.cleanup: ; preds = %vector.body 4792 ret void 4793} 4794 4795define void @sink_splat_or_lmulmf2(ptr nocapture %a, i32 signext %x) { 4796; CHECK-LABEL: sink_splat_or_lmulmf2: 4797; CHECK: # %bb.0: # %entry 4798; CHECK-NEXT: lui a2, 2 4799; CHECK-NEXT: add a2, a0, a2 4800; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4801; CHECK-NEXT: .LBB100_1: # %vector.body 4802; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4803; CHECK-NEXT: vle32.v v8, (a0) 4804; CHECK-NEXT: vor.vx v8, v8, a1 4805; CHECK-NEXT: vse32.v v8, (a0) 4806; CHECK-NEXT: addi a0, a0, 32 4807; CHECK-NEXT: bne a0, a2, .LBB100_1 4808; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4809; CHECK-NEXT: ret 4810entry: 4811 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4812 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4813 br label %vector.body 4814 4815vector.body: ; preds = %vector.body, %entry 4816 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4817 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4818 %wide.load = load <2 x i32>, ptr %0, align 8 4819 %1 = or <2 x i32> %wide.load, %broadcast.splat 4820 store <2 x i32> %1, ptr %0, align 8 4821 %index.next = add nuw i64 %index, 4 4822 %2 = icmp eq i64 %index.next, 1024 4823 br i1 %2, label %for.cond.cleanup, label %vector.body 4824 4825for.cond.cleanup: ; preds = %vector.body 4826 ret void 4827} 4828 4829define void @sink_splat_xor_lmulmf2(ptr nocapture %a, i32 signext %x) { 4830; CHECK-LABEL: sink_splat_xor_lmulmf2: 4831; CHECK: # %bb.0: # %entry 4832; CHECK-NEXT: lui a2, 2 4833; CHECK-NEXT: add a2, a0, a2 4834; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 4835; CHECK-NEXT: .LBB101_1: # %vector.body 4836; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4837; CHECK-NEXT: vle32.v v8, (a0) 4838; CHECK-NEXT: vxor.vx v8, v8, a1 4839; CHECK-NEXT: vse32.v v8, (a0) 4840; CHECK-NEXT: addi a0, a0, 32 4841; CHECK-NEXT: bne a0, a2, .LBB101_1 4842; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4843; CHECK-NEXT: ret 4844entry: 4845 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0 4846 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer 4847 br label %vector.body 4848 4849vector.body: ; preds = %vector.body, %entry 4850 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4851 %0 = getelementptr inbounds i64, ptr %a, i64 %index 4852 %wide.load = load <2 x i32>, ptr %0, align 8 4853 %1 = xor <2 x i32> %wide.load, %broadcast.splat 4854 store <2 x i32> %1, ptr %0, align 8 4855 %index.next = add nuw i64 %index, 4 4856 %2 = icmp eq i64 %index.next, 1024 4857 br i1 %2, label %for.cond.cleanup, label %vector.body 4858 4859for.cond.cleanup: ; preds = %vector.body 4860 ret void 4861} 4862 4863declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i32) 4864 4865define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i32 zeroext %vl) { 4866; CHECK-LABEL: sink_splat_vp_icmp: 4867; CHECK: # %bb.0: # %entry 4868; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4869; CHECK-NEXT: vmv1r.v v8, v0 4870; CHECK-NEXT: lui a3, 1 4871; CHECK-NEXT: add a3, a0, a3 4872; CHECK-NEXT: vmv.v.i v9, 0 4873; CHECK-NEXT: .LBB102_1: # %vector.body 4874; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4875; CHECK-NEXT: vle32.v v10, (a0) 4876; CHECK-NEXT: vmv1r.v v0, v8 4877; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 4878; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t 4879; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4880; CHECK-NEXT: vse32.v v9, (a0), v0.t 4881; CHECK-NEXT: addi a0, a0, 16 4882; CHECK-NEXT: bne a0, a3, .LBB102_1 4883; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4884; CHECK-NEXT: ret 4885entry: 4886 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0 4887 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 4888 br label %vector.body 4889 4890vector.body: ; preds = %vector.body, %entry 4891 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4892 %0 = getelementptr inbounds i32, ptr %x, i64 %index 4893 %wide.load = load <4 x i32>, ptr %0, align 4 4894 %1 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, metadata !"eq", <4 x i1> %m, i32 %vl) 4895 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1) 4896 %index.next = add nuw i64 %index, 4 4897 %2 = icmp eq i64 %index.next, 1024 4898 br i1 %2, label %for.cond.cleanup, label %vector.body 4899 4900for.cond.cleanup: ; preds = %vector.body 4901 ret void 4902} 4903 4904declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32) 4905 4906define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zeroext %vl) { 4907; CHECK-LABEL: sink_splat_vp_fcmp: 4908; CHECK: # %bb.0: # %entry 4909; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4910; CHECK-NEXT: vmv1r.v v8, v0 4911; CHECK-NEXT: lui a2, 1 4912; CHECK-NEXT: add a2, a0, a2 4913; CHECK-NEXT: vmv.v.i v9, 0 4914; CHECK-NEXT: .LBB103_1: # %vector.body 4915; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4916; CHECK-NEXT: vle32.v v10, (a0) 4917; CHECK-NEXT: vmv1r.v v0, v8 4918; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 4919; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t 4920; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4921; CHECK-NEXT: vse32.v v9, (a0), v0.t 4922; CHECK-NEXT: addi a0, a0, 16 4923; CHECK-NEXT: bne a0, a2, .LBB103_1 4924; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4925; CHECK-NEXT: ret 4926entry: 4927 %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0 4928 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 4929 br label %vector.body 4930 4931vector.body: ; preds = %vector.body, %entry 4932 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4933 %0 = getelementptr inbounds float, ptr %x, i64 %index 4934 %wide.load = load <4 x float>, ptr %0, align 4 4935 %1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, metadata !"oeq", <4 x i1> %m, i32 %vl) 4936 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1) 4937 %index.next = add nuw i64 %index, 4 4938 %2 = icmp eq i64 %index.next, 1024 4939 br i1 %2, label %for.cond.cleanup, label %vector.body 4940 4941for.cond.cleanup: ; preds = %vector.body 4942 ret void 4943} 4944 4945declare <4 x i32> @llvm.vp.smin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 4946 4947define void @sink_splat_vp_min(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 4948; CHECK-LABEL: sink_splat_vp_min: 4949; CHECK: # %bb.0: # %entry 4950; CHECK-NEXT: lui a3, 1 4951; CHECK-NEXT: add a3, a0, a3 4952; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4953; CHECK-NEXT: .LBB104_1: # %vector.body 4954; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4955; CHECK-NEXT: vle32.v v8, (a0) 4956; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 4957; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t 4958; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4959; CHECK-NEXT: vse32.v v8, (a0) 4960; CHECK-NEXT: addi a0, a0, 16 4961; CHECK-NEXT: bne a0, a3, .LBB104_1 4962; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4963; CHECK-NEXT: ret 4964entry: 4965 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 4966 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 4967 br label %vector.body 4968 4969vector.body: ; preds = %vector.body, %entry 4970 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 4971 %0 = getelementptr inbounds i32, ptr %a, i64 %index 4972 %wide.load = load <4 x i32>, ptr %0, align 4 4973 %1 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 4974 store <4 x i32> %1, ptr %0, align 4 4975 %index.next = add nuw i64 %index, 4 4976 %2 = icmp eq i64 %index.next, 1024 4977 br i1 %2, label %for.cond.cleanup, label %vector.body 4978 4979for.cond.cleanup: ; preds = %vector.body 4980 ret void 4981} 4982 4983define void @sink_splat_vp_min_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 4984; CHECK-LABEL: sink_splat_vp_min_commute: 4985; CHECK: # %bb.0: # %entry 4986; CHECK-NEXT: lui a3, 1 4987; CHECK-NEXT: add a3, a0, a3 4988; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4989; CHECK-NEXT: .LBB105_1: # %vector.body 4990; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 4991; CHECK-NEXT: vle32.v v8, (a0) 4992; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 4993; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t 4994; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4995; CHECK-NEXT: vse32.v v8, (a0) 4996; CHECK-NEXT: addi a0, a0, 16 4997; CHECK-NEXT: bne a0, a3, .LBB105_1 4998; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 4999; CHECK-NEXT: ret 5000entry: 5001 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5002 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5003 br label %vector.body 5004 5005vector.body: ; preds = %vector.body, %entry 5006 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5007 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5008 %wide.load = load <4 x i32>, ptr %0, align 4 5009 %1 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 5010 store <4 x i32> %1, ptr %0, align 4 5011 %index.next = add nuw i64 %index, 4 5012 %2 = icmp eq i64 %index.next, 1024 5013 br i1 %2, label %for.cond.cleanup, label %vector.body 5014 5015for.cond.cleanup: ; preds = %vector.body 5016 ret void 5017} 5018 5019declare <4 x i32> @llvm.vp.smax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 5020 5021define void @sink_splat_vp_max(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5022; CHECK-LABEL: sink_splat_vp_max: 5023; CHECK: # %bb.0: # %entry 5024; CHECK-NEXT: lui a3, 1 5025; CHECK-NEXT: add a3, a0, a3 5026; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5027; CHECK-NEXT: .LBB106_1: # %vector.body 5028; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5029; CHECK-NEXT: vle32.v v8, (a0) 5030; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5031; CHECK-NEXT: vmax.vx v8, v8, a1, v0.t 5032; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5033; CHECK-NEXT: vse32.v v8, (a0) 5034; CHECK-NEXT: addi a0, a0, 16 5035; CHECK-NEXT: bne a0, a3, .LBB106_1 5036; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5037; CHECK-NEXT: ret 5038entry: 5039 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5040 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5041 br label %vector.body 5042 5043vector.body: ; preds = %vector.body, %entry 5044 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5045 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5046 %wide.load = load <4 x i32>, ptr %0, align 4 5047 %1 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 5048 store <4 x i32> %1, ptr %0, align 4 5049 %index.next = add nuw i64 %index, 4 5050 %2 = icmp eq i64 %index.next, 1024 5051 br i1 %2, label %for.cond.cleanup, label %vector.body 5052 5053for.cond.cleanup: ; preds = %vector.body 5054 ret void 5055} 5056 5057define void @sink_splat_vp_max_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5058; CHECK-LABEL: sink_splat_vp_max_commute: 5059; CHECK: # %bb.0: # %entry 5060; CHECK-NEXT: lui a3, 1 5061; CHECK-NEXT: add a3, a0, a3 5062; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5063; CHECK-NEXT: .LBB107_1: # %vector.body 5064; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5065; CHECK-NEXT: vle32.v v8, (a0) 5066; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5067; CHECK-NEXT: vmax.vx v8, v8, a1, v0.t 5068; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5069; CHECK-NEXT: vse32.v v8, (a0) 5070; CHECK-NEXT: addi a0, a0, 16 5071; CHECK-NEXT: bne a0, a3, .LBB107_1 5072; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5073; CHECK-NEXT: ret 5074entry: 5075 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5076 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5077 br label %vector.body 5078 5079vector.body: ; preds = %vector.body, %entry 5080 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5081 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5082 %wide.load = load <4 x i32>, ptr %0, align 4 5083 %1 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 5084 store <4 x i32> %1, ptr %0, align 4 5085 %index.next = add nuw i64 %index, 4 5086 %2 = icmp eq i64 %index.next, 1024 5087 br i1 %2, label %for.cond.cleanup, label %vector.body 5088 5089for.cond.cleanup: ; preds = %vector.body 5090 ret void 5091} 5092 5093define void @sink_splat_vp_umin_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5094; CHECK-LABEL: sink_splat_vp_umin_commute: 5095; CHECK: # %bb.0: # %entry 5096; CHECK-NEXT: lui a3, 1 5097; CHECK-NEXT: add a3, a0, a3 5098; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5099; CHECK-NEXT: .LBB108_1: # %vector.body 5100; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5101; CHECK-NEXT: vle32.v v8, (a0) 5102; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5103; CHECK-NEXT: vminu.vx v8, v8, a1, v0.t 5104; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5105; CHECK-NEXT: vse32.v v8, (a0) 5106; CHECK-NEXT: addi a0, a0, 16 5107; CHECK-NEXT: bne a0, a3, .LBB108_1 5108; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5109; CHECK-NEXT: ret 5110entry: 5111 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5112 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5113 br label %vector.body 5114 5115vector.body: ; preds = %vector.body, %entry 5116 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5117 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5118 %wide.load = load <4 x i32>, ptr %0, align 4 5119 %1 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 5120 store <4 x i32> %1, ptr %0, align 4 5121 %index.next = add nuw i64 %index, 4 5122 %2 = icmp eq i64 %index.next, 1024 5123 br i1 %2, label %for.cond.cleanup, label %vector.body 5124 5125for.cond.cleanup: ; preds = %vector.body 5126 ret void 5127} 5128 5129declare <4 x i32> @llvm.vp.umax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 5130 5131define void @sink_splat_vp_umax(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5132; CHECK-LABEL: sink_splat_vp_umax: 5133; CHECK: # %bb.0: # %entry 5134; CHECK-NEXT: lui a3, 1 5135; CHECK-NEXT: add a3, a0, a3 5136; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5137; CHECK-NEXT: .LBB109_1: # %vector.body 5138; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5139; CHECK-NEXT: vle32.v v8, (a0) 5140; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5141; CHECK-NEXT: vmaxu.vx v8, v8, a1, v0.t 5142; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5143; CHECK-NEXT: vse32.v v8, (a0) 5144; CHECK-NEXT: addi a0, a0, 16 5145; CHECK-NEXT: bne a0, a3, .LBB109_1 5146; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5147; CHECK-NEXT: ret 5148entry: 5149 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5150 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5151 br label %vector.body 5152 5153vector.body: ; preds = %vector.body, %entry 5154 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5155 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5156 %wide.load = load <4 x i32>, ptr %0, align 4 5157 %1 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 5158 store <4 x i32> %1, ptr %0, align 4 5159 %index.next = add nuw i64 %index, 4 5160 %2 = icmp eq i64 %index.next, 1024 5161 br i1 %2, label %for.cond.cleanup, label %vector.body 5162 5163for.cond.cleanup: ; preds = %vector.body 5164 ret void 5165} 5166 5167define void @sink_splat_vp_umax_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5168; CHECK-LABEL: sink_splat_vp_umax_commute: 5169; CHECK: # %bb.0: # %entry 5170; CHECK-NEXT: lui a3, 1 5171; CHECK-NEXT: add a3, a0, a3 5172; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5173; CHECK-NEXT: .LBB110_1: # %vector.body 5174; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5175; CHECK-NEXT: vle32.v v8, (a0) 5176; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5177; CHECK-NEXT: vmaxu.vx v8, v8, a1, v0.t 5178; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5179; CHECK-NEXT: vse32.v v8, (a0) 5180; CHECK-NEXT: addi a0, a0, 16 5181; CHECK-NEXT: bne a0, a3, .LBB110_1 5182; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5183; CHECK-NEXT: ret 5184entry: 5185 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5186 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5187 br label %vector.body 5188 5189vector.body: ; preds = %vector.body, %entry 5190 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5191 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5192 %wide.load = load <4 x i32>, ptr %0, align 4 5193 %1 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 5194 store <4 x i32> %1, ptr %0, align 4 5195 %index.next = add nuw i64 %index, 4 5196 %2 = icmp eq i64 %index.next, 1024 5197 br i1 %2, label %for.cond.cleanup, label %vector.body 5198 5199for.cond.cleanup: ; preds = %vector.body 5200 ret void 5201} 5202 5203declare <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 5204 5205define void @sink_splat_vp_sadd_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5206; CHECK-LABEL: sink_splat_vp_sadd_sat: 5207; CHECK: # %bb.0: # %entry 5208; CHECK-NEXT: lui a3, 1 5209; CHECK-NEXT: add a3, a0, a3 5210; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5211; CHECK-NEXT: .LBB111_1: # %vector.body 5212; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5213; CHECK-NEXT: vle32.v v8, (a0) 5214; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5215; CHECK-NEXT: vsadd.vx v8, v8, a1, v0.t 5216; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5217; CHECK-NEXT: vse32.v v8, (a0) 5218; CHECK-NEXT: addi a0, a0, 16 5219; CHECK-NEXT: bne a0, a3, .LBB111_1 5220; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5221; CHECK-NEXT: ret 5222entry: 5223 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5224 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5225 br label %vector.body 5226 5227vector.body: ; preds = %vector.body, %entry 5228 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5229 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5230 %wide.load = load <4 x i32>, ptr %0, align 4 5231 %1 = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 5232 store <4 x i32> %1, ptr %0, align 4 5233 %index.next = add nuw i64 %index, 4 5234 %2 = icmp eq i64 %index.next, 1024 5235 br i1 %2, label %for.cond.cleanup, label %vector.body 5236 5237for.cond.cleanup: ; preds = %vector.body 5238 ret void 5239} 5240 5241define void @sink_splat_vp_sadd_sat_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5242; CHECK-LABEL: sink_splat_vp_sadd_sat_commute: 5243; CHECK: # %bb.0: # %entry 5244; CHECK-NEXT: lui a3, 1 5245; CHECK-NEXT: add a3, a0, a3 5246; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5247; CHECK-NEXT: .LBB112_1: # %vector.body 5248; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5249; CHECK-NEXT: vle32.v v8, (a0) 5250; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5251; CHECK-NEXT: vsadd.vx v8, v8, a1, v0.t 5252; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5253; CHECK-NEXT: vse32.v v8, (a0) 5254; CHECK-NEXT: addi a0, a0, 16 5255; CHECK-NEXT: bne a0, a3, .LBB112_1 5256; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5257; CHECK-NEXT: ret 5258entry: 5259 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5260 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5261 br label %vector.body 5262 5263vector.body: ; preds = %vector.body, %entry 5264 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5265 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5266 %wide.load = load <4 x i32>, ptr %0, align 4 5267 %1 = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 5268 store <4 x i32> %1, ptr %0, align 4 5269 %index.next = add nuw i64 %index, 4 5270 %2 = icmp eq i64 %index.next, 1024 5271 br i1 %2, label %for.cond.cleanup, label %vector.body 5272 5273for.cond.cleanup: ; preds = %vector.body 5274 ret void 5275} 5276 5277declare <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 5278 5279define void @sink_splat_vp_ssub_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5280; CHECK-LABEL: sink_splat_vp_ssub_sat: 5281; CHECK: # %bb.0: # %entry 5282; CHECK-NEXT: li a3, 1024 5283; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5284; CHECK-NEXT: .LBB113_1: # %vector.body 5285; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5286; CHECK-NEXT: vle32.v v8, (a0) 5287; CHECK-NEXT: addi a3, a3, 4 5288; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5289; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t 5290; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5291; CHECK-NEXT: vse32.v v8, (a0) 5292; CHECK-NEXT: addi a0, a0, -16 5293; CHECK-NEXT: bnez a3, .LBB113_1 5294; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5295; CHECK-NEXT: ret 5296entry: 5297 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5298 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5299 br label %vector.body 5300 5301vector.body: ; preds = %vector.body, %entry 5302 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5303 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5304 %wide.load = load <4 x i32>, ptr %0, align 4 5305 %1 = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 5306 store <4 x i32> %1, ptr %0, align 4 5307 %index.next = sub nuw i64 %index, 4 5308 %2 = icmp eq i64 %index.next, 1024 5309 br i1 %2, label %for.cond.cleanup, label %vector.body 5310 5311for.cond.cleanup: ; preds = %vector.body 5312 ret void 5313} 5314 5315declare <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 5316 5317define void @sink_splat_vp_uadd_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5318; CHECK-LABEL: sink_splat_vp_uadd_sat: 5319; CHECK: # %bb.0: # %entry 5320; CHECK-NEXT: lui a3, 1 5321; CHECK-NEXT: add a3, a0, a3 5322; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5323; CHECK-NEXT: .LBB114_1: # %vector.body 5324; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5325; CHECK-NEXT: vle32.v v8, (a0) 5326; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5327; CHECK-NEXT: vsaddu.vx v8, v8, a1, v0.t 5328; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5329; CHECK-NEXT: vse32.v v8, (a0) 5330; CHECK-NEXT: addi a0, a0, 16 5331; CHECK-NEXT: bne a0, a3, .LBB114_1 5332; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5333; CHECK-NEXT: ret 5334entry: 5335 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5336 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5337 br label %vector.body 5338 5339vector.body: ; preds = %vector.body, %entry 5340 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5341 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5342 %wide.load = load <4 x i32>, ptr %0, align 4 5343 %1 = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 5344 store <4 x i32> %1, ptr %0, align 4 5345 %index.next = add nuw i64 %index, 4 5346 %2 = icmp eq i64 %index.next, 1024 5347 br i1 %2, label %for.cond.cleanup, label %vector.body 5348 5349for.cond.cleanup: ; preds = %vector.body 5350 ret void 5351} 5352 5353define void @sink_splat_vp_uadd_sat_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5354; CHECK-LABEL: sink_splat_vp_uadd_sat_commute: 5355; CHECK: # %bb.0: # %entry 5356; CHECK-NEXT: lui a3, 1 5357; CHECK-NEXT: add a3, a0, a3 5358; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5359; CHECK-NEXT: .LBB115_1: # %vector.body 5360; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5361; CHECK-NEXT: vle32.v v8, (a0) 5362; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5363; CHECK-NEXT: vsaddu.vx v8, v8, a1, v0.t 5364; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5365; CHECK-NEXT: vse32.v v8, (a0) 5366; CHECK-NEXT: addi a0, a0, 16 5367; CHECK-NEXT: bne a0, a3, .LBB115_1 5368; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5369; CHECK-NEXT: ret 5370entry: 5371 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5372 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5373 br label %vector.body 5374 5375vector.body: ; preds = %vector.body, %entry 5376 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5377 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5378 %wide.load = load <4 x i32>, ptr %0, align 4 5379 %1 = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) 5380 store <4 x i32> %1, ptr %0, align 4 5381 %index.next = add nuw i64 %index, 4 5382 %2 = icmp eq i64 %index.next, 1024 5383 br i1 %2, label %for.cond.cleanup, label %vector.body 5384 5385for.cond.cleanup: ; preds = %vector.body 5386 ret void 5387} 5388 5389declare <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) 5390 5391define void @sink_splat_vp_usub_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { 5392; CHECK-LABEL: sink_splat_vp_usub_sat: 5393; CHECK: # %bb.0: # %entry 5394; CHECK-NEXT: li a3, 1024 5395; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5396; CHECK-NEXT: .LBB116_1: # %vector.body 5397; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5398; CHECK-NEXT: vle32.v v8, (a0) 5399; CHECK-NEXT: addi a3, a3, 4 5400; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 5401; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t 5402; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5403; CHECK-NEXT: vse32.v v8, (a0) 5404; CHECK-NEXT: addi a0, a0, -16 5405; CHECK-NEXT: bnez a3, .LBB116_1 5406; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5407; CHECK-NEXT: ret 5408entry: 5409 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5410 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5411 br label %vector.body 5412 5413vector.body: ; preds = %vector.body, %entry 5414 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5415 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5416 %wide.load = load <4 x i32>, ptr %0, align 4 5417 %1 = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) 5418 store <4 x i32> %1, ptr %0, align 4 5419 %index.next = sub nuw i64 %index, 4 5420 %2 = icmp eq i64 %index.next, 1024 5421 br i1 %2, label %for.cond.cleanup, label %vector.body 5422 5423for.cond.cleanup: ; preds = %vector.body 5424 ret void 5425} 5426 5427define void @sink_splat_select_op1(ptr nocapture %a, i32 signext %x) { 5428; CHECK-LABEL: sink_splat_select_op1: 5429; CHECK: # %bb.0: # %entry 5430; CHECK-NEXT: lui a2, 1 5431; CHECK-NEXT: add a2, a0, a2 5432; CHECK-NEXT: li a3, 42 5433; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5434; CHECK-NEXT: .LBB117_1: # %vector.body 5435; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5436; CHECK-NEXT: vle32.v v8, (a0) 5437; CHECK-NEXT: vmseq.vx v0, v8, a3 5438; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 5439; CHECK-NEXT: vse32.v v8, (a0) 5440; CHECK-NEXT: addi a0, a0, 16 5441; CHECK-NEXT: bne a0, a2, .LBB117_1 5442; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5443; CHECK-NEXT: ret 5444entry: 5445 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5446 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5447 br label %vector.body 5448 5449vector.body: ; preds = %vector.body, %entry 5450 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5451 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5452 %load = load <4 x i32>, ptr %0, align 4 5453 %cond = icmp eq <4 x i32> %load, splat (i32 42) 5454 %1 = select <4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load 5455 store <4 x i32> %1, ptr %0, align 4 5456 %index.next = add nuw i64 %index, 4 5457 %2 = icmp eq i64 %index.next, 1024 5458 br i1 %2, label %for.cond.cleanup, label %vector.body 5459 5460for.cond.cleanup: ; preds = %vector.body 5461 ret void 5462} 5463 5464define void @sink_splat_select_op2(ptr nocapture %a, i32 signext %x) { 5465; CHECK-LABEL: sink_splat_select_op2: 5466; CHECK: # %bb.0: # %entry 5467; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5468; CHECK-NEXT: vmv.v.x v8, a1 5469; CHECK-NEXT: lui a1, 1 5470; CHECK-NEXT: add a1, a0, a1 5471; CHECK-NEXT: li a2, 42 5472; CHECK-NEXT: .LBB118_1: # %vector.body 5473; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5474; CHECK-NEXT: vle32.v v9, (a0) 5475; CHECK-NEXT: vmseq.vx v0, v9, a2 5476; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0 5477; CHECK-NEXT: vse32.v v9, (a0) 5478; CHECK-NEXT: addi a0, a0, 16 5479; CHECK-NEXT: bne a0, a1, .LBB118_1 5480; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5481; CHECK-NEXT: ret 5482entry: 5483 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5484 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5485 br label %vector.body 5486 5487vector.body: ; preds = %vector.body, %entry 5488 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5489 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5490 %load = load <4 x i32>, ptr %0, align 4 5491 %cond = icmp eq <4 x i32> %load, splat (i32 42) 5492 %1 = select <4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat 5493 store <4 x i32> %1, ptr %0, align 4 5494 %index.next = add nuw i64 %index, 4 5495 %2 = icmp eq i64 %index.next, 1024 5496 br i1 %2, label %for.cond.cleanup, label %vector.body 5497 5498for.cond.cleanup: ; preds = %vector.body 5499 ret void 5500} 5501 5502define void @sink_splat_vp_select_op1(ptr %a, i32 %x, i32 %vl) { 5503; CHECK-LABEL: sink_splat_vp_select_op1: 5504; CHECK: # %bb.0: # %entry 5505; CHECK-NEXT: lui a4, 1 5506; CHECK-NEXT: li a3, 42 5507; CHECK-NEXT: slli a5, a2, 32 5508; CHECK-NEXT: add a2, a0, a4 5509; CHECK-NEXT: srli a4, a5, 32 5510; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5511; CHECK-NEXT: .LBB119_1: # %vector.body 5512; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5513; CHECK-NEXT: vle32.v v8, (a0) 5514; CHECK-NEXT: vmseq.vx v0, v8, a3 5515; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma 5516; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 5517; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5518; CHECK-NEXT: vse32.v v8, (a0) 5519; CHECK-NEXT: addi a0, a0, 16 5520; CHECK-NEXT: bne a0, a2, .LBB119_1 5521; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5522; CHECK-NEXT: ret 5523entry: 5524 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5525 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5526 br label %vector.body 5527 5528vector.body: 5529 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5530 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5531 %load = load <4 x i32>, ptr %0, align 4 5532 %cond = icmp eq <4 x i32> %load, splat (i32 42) 5533 %1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load, i32 %vl) 5534 store <4 x i32> %1, ptr %0, align 4 5535 %index.next = add nuw i64 %index, 4 5536 %2 = icmp eq i64 %index.next, 1024 5537 br i1 %2, label %for.cond.cleanup, label %vector.body 5538 5539for.cond.cleanup: 5540 ret void 5541} 5542 5543define void @sink_splat_vp_select_op2(ptr %a, i32 %x, i32 %vl) { 5544; CHECK-LABEL: sink_splat_vp_select_op2: 5545; CHECK: # %bb.0: # %entry 5546; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5547; CHECK-NEXT: vmv.v.x v8, a1 5548; CHECK-NEXT: lui a3, 1 5549; CHECK-NEXT: li a1, 42 5550; CHECK-NEXT: slli a4, a2, 32 5551; CHECK-NEXT: add a2, a0, a3 5552; CHECK-NEXT: srli a3, a4, 32 5553; CHECK-NEXT: .LBB120_1: # %vector.body 5554; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5555; CHECK-NEXT: vle32.v v9, (a0) 5556; CHECK-NEXT: vmseq.vx v0, v9, a1 5557; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma 5558; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0 5559; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5560; CHECK-NEXT: vse32.v v9, (a0) 5561; CHECK-NEXT: addi a0, a0, 16 5562; CHECK-NEXT: bne a0, a2, .LBB120_1 5563; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5564; CHECK-NEXT: ret 5565entry: 5566 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 5567 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer 5568 br label %vector.body 5569 5570vector.body: 5571 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5572 %0 = getelementptr inbounds i32, ptr %a, i64 %index 5573 %load = load <4 x i32>, ptr %0, align 4 5574 %cond = icmp eq <4 x i32> %load, splat (i32 42) 5575 %1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat, i32 %vl) 5576 store <4 x i32> %1, ptr %0, align 4 5577 %index.next = add nuw i64 %index, 4 5578 %2 = icmp eq i64 %index.next, 1024 5579 br i1 %2, label %for.cond.cleanup, label %vector.body 5580 5581for.cond.cleanup: 5582 ret void 5583} 5584 5585define void @sink_splat_fmuladd(ptr %a, ptr %b, float %x) { 5586; CHECK-LABEL: sink_splat_fmuladd: 5587; CHECK: # %bb.0: # %entry 5588; CHECK-NEXT: lui a2, 1 5589; CHECK-NEXT: add a2, a1, a2 5590; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5591; CHECK-NEXT: .LBB121_1: # %vector.body 5592; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5593; CHECK-NEXT: vle32.v v8, (a0) 5594; CHECK-NEXT: vle32.v v9, (a1) 5595; CHECK-NEXT: addi a1, a1, 16 5596; CHECK-NEXT: vfmacc.vf v9, fa0, v8 5597; CHECK-NEXT: vse32.v v9, (a0) 5598; CHECK-NEXT: addi a0, a0, 16 5599; CHECK-NEXT: bne a1, a2, .LBB121_1 5600; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5601; CHECK-NEXT: ret 5602entry: 5603 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 5604 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 5605 br label %vector.body 5606 5607vector.body: 5608 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5609 %0 = getelementptr inbounds float, ptr %a, i64 %index 5610 %wide.load = load <4 x float>, ptr %0, align 4 5611 %1 = getelementptr inbounds float, ptr %b, i64 %index 5612 %wide.load12 = load <4 x float>, ptr %1, align 4 5613 %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12) 5614 store <4 x float> %2, ptr %0, align 4 5615 %index.next = add nuw i64 %index, 4 5616 %3 = icmp eq i64 %index.next, 1024 5617 br i1 %3, label %for.cond.cleanup, label %vector.body 5618 5619for.cond.cleanup: 5620 ret void 5621} 5622 5623define void @sink_splat_fmuladd_commute(ptr %a, ptr %b, float %x) { 5624; CHECK-LABEL: sink_splat_fmuladd_commute: 5625; CHECK: # %bb.0: # %entry 5626; CHECK-NEXT: lui a2, 1 5627; CHECK-NEXT: add a2, a1, a2 5628; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5629; CHECK-NEXT: .LBB122_1: # %vector.body 5630; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5631; CHECK-NEXT: vle32.v v8, (a0) 5632; CHECK-NEXT: vle32.v v9, (a1) 5633; CHECK-NEXT: addi a1, a1, 16 5634; CHECK-NEXT: vfmacc.vf v9, fa0, v8 5635; CHECK-NEXT: vse32.v v9, (a0) 5636; CHECK-NEXT: addi a0, a0, 16 5637; CHECK-NEXT: bne a1, a2, .LBB122_1 5638; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5639; CHECK-NEXT: ret 5640entry: 5641 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 5642 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 5643 br label %vector.body 5644 5645vector.body: 5646 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5647 %0 = getelementptr inbounds float, ptr %a, i64 %index 5648 %wide.load = load <4 x float>, ptr %0, align 4 5649 %1 = getelementptr inbounds float, ptr %b, i64 %index 5650 %wide.load12 = load <4 x float>, ptr %1, align 4 5651 %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12) 5652 store <4 x float> %2, ptr %0, align 4 5653 %index.next = add nuw i64 %index, 4 5654 %3 = icmp eq i64 %index.next, 1024 5655 br i1 %3, label %for.cond.cleanup, label %vector.body 5656 5657for.cond.cleanup: 5658 ret void 5659} 5660 5661define void @sink_splat_vp_fmuladd(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) { 5662; CHECK-LABEL: sink_splat_vp_fmuladd: 5663; CHECK: # %bb.0: # %entry 5664; CHECK-NEXT: lui a3, 1 5665; CHECK-NEXT: slli a4, a2, 32 5666; CHECK-NEXT: add a2, a1, a3 5667; CHECK-NEXT: srli a3, a4, 32 5668; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5669; CHECK-NEXT: .LBB123_1: # %vector.body 5670; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5671; CHECK-NEXT: vle32.v v8, (a0) 5672; CHECK-NEXT: vle32.v v9, (a1) 5673; CHECK-NEXT: addi a1, a1, 16 5674; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma 5675; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 5676; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5677; CHECK-NEXT: vse32.v v8, (a0) 5678; CHECK-NEXT: addi a0, a0, 16 5679; CHECK-NEXT: bne a1, a2, .LBB123_1 5680; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5681; CHECK-NEXT: ret 5682entry: 5683 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 5684 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 5685 br label %vector.body 5686 5687vector.body: 5688 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5689 %0 = getelementptr inbounds float, ptr %a, i64 %index 5690 %wide.load = load <4 x float>, ptr %0, align 4 5691 %1 = getelementptr inbounds float, ptr %b, i64 %index 5692 %wide.load12 = load <4 x float>, ptr %1, align 4 5693 %2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl) 5694 store <4 x float> %2, ptr %0, align 4 5695 %index.next = add nuw i64 %index, 4 5696 %3 = icmp eq i64 %index.next, 1024 5697 br i1 %3, label %for.cond.cleanup, label %vector.body 5698 5699for.cond.cleanup: 5700 ret void 5701} 5702 5703define void @sink_splat_vp_fmuladd_commute(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) { 5704; CHECK-LABEL: sink_splat_vp_fmuladd_commute: 5705; CHECK: # %bb.0: # %entry 5706; CHECK-NEXT: lui a3, 1 5707; CHECK-NEXT: slli a4, a2, 32 5708; CHECK-NEXT: add a2, a1, a3 5709; CHECK-NEXT: srli a3, a4, 32 5710; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5711; CHECK-NEXT: .LBB124_1: # %vector.body 5712; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 5713; CHECK-NEXT: vle32.v v8, (a0) 5714; CHECK-NEXT: vle32.v v9, (a1) 5715; CHECK-NEXT: addi a1, a1, 16 5716; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma 5717; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 5718; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5719; CHECK-NEXT: vse32.v v8, (a0) 5720; CHECK-NEXT: addi a0, a0, 16 5721; CHECK-NEXT: bne a1, a2, .LBB124_1 5722; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 5723; CHECK-NEXT: ret 5724entry: 5725 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 5726 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer 5727 br label %vector.body 5728 5729vector.body: 5730 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 5731 %0 = getelementptr inbounds float, ptr %a, i64 %index 5732 %wide.load = load <4 x float>, ptr %0, align 4 5733 %1 = getelementptr inbounds float, ptr %b, i64 %index 5734 %wide.load12 = load <4 x float>, ptr %1, align 4 5735 %2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl) 5736 store <4 x float> %2, ptr %0, align 4 5737 %index.next = add nuw i64 %index, 4 5738 %3 = icmp eq i64 %index.next, 1024 5739 br i1 %3, label %for.cond.cleanup, label %vector.body 5740 5741for.cond.cleanup: 5742 ret void 5743} 5744