1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2 3; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ 4; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 5; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ 6; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 7 8declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 9 <vscale x 1 x i8>, 10 <vscale x 1 x i8>, 11 <vscale x 1 x i8>, 12 iXLen, iXLen); 13declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( 14 <vscale x 1 x i8>, 15 <vscale x 1 x i8>, 16 <vscale x 1 x i8>, 17 iXLen, iXLen); 18 19; Test same rounding mode in one block. 20define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { 21; CHECK-LABEL: test1: 22; CHECK: # %bb.0: # %entry 23; CHECK-NEXT: csrwi vxrm, 0 24; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 25; CHECK-NEXT: vaadd.vv v8, v8, v9 26; CHECK-NEXT: vaadd.vv v8, v8, v10 27; CHECK-NEXT: ret 28entry: 29 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 30 <vscale x 1 x i8> undef, 31 <vscale x 1 x i8> %0, 32 <vscale x 1 x i8> %1, 33 iXLen 0, iXLen %3) 34 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 35 <vscale x 1 x i8> undef, 36 <vscale x 1 x i8> %a, 37 <vscale x 1 x i8> %2, 38 iXLen 0, iXLen %3) 39 40 ret <vscale x 1 x i8> %b 41} 42 43; Test different rounding mode. 44define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { 45; CHECK-LABEL: test2: 46; CHECK: # %bb.0: # %entry 47; CHECK-NEXT: csrwi vxrm, 2 48; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 49; CHECK-NEXT: vaadd.vv v8, v8, v9 50; CHECK-NEXT: csrwi vxrm, 0 51; CHECK-NEXT: vaadd.vv v8, v8, v10 52; CHECK-NEXT: ret 53entry: 54 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 55 <vscale x 1 x i8> undef, 56 <vscale x 1 x i8> %0, 57 <vscale x 1 x i8> %1, 58 iXLen 2, iXLen %3) 59 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 60 <vscale x 1 x i8> undef, 61 <vscale x 1 x i8> %a, 62 <vscale x 1 x i8> %2, 63 iXLen 0, iXLen %3) 64 65 ret <vscale x 1 x i8> %b 66} 67 68declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>) 69 70; Test same vxrm with call in between which may invalidate vxrm. 71define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { 72; RV32-LABEL: test3: 73; RV32: # %bb.0: # %entry 74; RV32-NEXT: addi sp, sp, -32 75; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 76; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 77; RV32-NEXT: csrr a1, vlenb 78; RV32-NEXT: sub sp, sp, a1 79; RV32-NEXT: mv s0, a0 80; RV32-NEXT: addi a1, sp, 16 81; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill 82; RV32-NEXT: csrwi vxrm, 0 83; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 84; RV32-NEXT: vaadd.vv v8, v8, v9 85; RV32-NEXT: call foo 86; RV32-NEXT: csrwi vxrm, 0 87; RV32-NEXT: addi a0, sp, 16 88; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload 89; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma 90; RV32-NEXT: vaadd.vv v8, v8, v9 91; RV32-NEXT: csrr a0, vlenb 92; RV32-NEXT: add sp, sp, a0 93; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 94; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 95; RV32-NEXT: addi sp, sp, 32 96; RV32-NEXT: ret 97; 98; RV64-LABEL: test3: 99; RV64: # %bb.0: # %entry 100; RV64-NEXT: addi sp, sp, -32 101; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 102; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 103; RV64-NEXT: csrr a1, vlenb 104; RV64-NEXT: sub sp, sp, a1 105; RV64-NEXT: mv s0, a0 106; RV64-NEXT: addi a1, sp, 16 107; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill 108; RV64-NEXT: csrwi vxrm, 0 109; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 110; RV64-NEXT: vaadd.vv v8, v8, v9 111; RV64-NEXT: call foo 112; RV64-NEXT: csrwi vxrm, 0 113; RV64-NEXT: addi a0, sp, 16 114; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload 115; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma 116; RV64-NEXT: vaadd.vv v8, v8, v9 117; RV64-NEXT: csrr a0, vlenb 118; RV64-NEXT: add sp, sp, a0 119; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 120; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 121; RV64-NEXT: addi sp, sp, 32 122; RV64-NEXT: ret 123entry: 124 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 125 <vscale x 1 x i8> undef, 126 <vscale x 1 x i8> %0, 127 <vscale x 1 x i8> %1, 128 iXLen 0, iXLen %3) 129 %b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a) 130 %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 131 <vscale x 1 x i8> undef, 132 <vscale x 1 x i8> %b, 133 <vscale x 1 x i8> %2, 134 iXLen 0, iXLen %3) 135 136 ret <vscale x 1 x i8> %c 137} 138 139; Test same vxrm with asm in between which may invalidate vxrm. 140define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { 141; CHECK-LABEL: test4: 142; CHECK: # %bb.0: # %entry 143; CHECK-NEXT: csrwi vxrm, 0 144; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 145; CHECK-NEXT: vaadd.vv v8, v8, v9 146; CHECK-NEXT: #APP 147; CHECK-NEXT: #NO_APP 148; CHECK-NEXT: csrwi vxrm, 0 149; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 150; CHECK-NEXT: vaadd.vv v8, v8, v10 151; CHECK-NEXT: ret 152entry: 153 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 154 <vscale x 1 x i8> undef, 155 <vscale x 1 x i8> %0, 156 <vscale x 1 x i8> %1, 157 iXLen 0, iXLen %3) 158 %b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a) 159 %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 160 <vscale x 1 x i8> undef, 161 <vscale x 1 x i8> %b, 162 <vscale x 1 x i8> %2, 163 iXLen 0, iXLen %3) 164 165 ret <vscale x 1 x i8> %c 166} 167 168; Test same rounding mode in triangle. 169define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { 170; CHECK-LABEL: test5: 171; CHECK: # %bb.0: # %entry 172; CHECK-NEXT: andi a1, a1, 1 173; CHECK-NEXT: csrwi vxrm, 0 174; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 175; CHECK-NEXT: vaadd.vv v8, v8, v9 176; CHECK-NEXT: beqz a1, .LBB4_2 177; CHECK-NEXT: # %bb.1: # %condblock 178; CHECK-NEXT: vaadd.vv v8, v8, v10 179; CHECK-NEXT: .LBB4_2: # %mergeblock 180; CHECK-NEXT: ret 181entry: 182 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 183 <vscale x 1 x i8> undef, 184 <vscale x 1 x i8> %0, 185 <vscale x 1 x i8> %1, 186 iXLen 0, iXLen %3) 187 br i1 %cond, label %condblock, label %mergeblock 188 189condblock: 190 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 191 <vscale x 1 x i8> undef, 192 <vscale x 1 x i8> %a, 193 <vscale x 1 x i8> %2, 194 iXLen 0, iXLen %3) 195 br label %mergeblock 196 197mergeblock: 198 %c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock] 199 200 ret <vscale x 1 x i8> %c 201} 202 203; Test same rounding mode in diamond with no dominating vxrm. 204define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { 205; CHECK-LABEL: test6: 206; CHECK: # %bb.0: # %entry 207; CHECK-NEXT: andi a1, a1, 1 208; CHECK-NEXT: csrwi vxrm, 0 209; CHECK-NEXT: beqz a1, .LBB5_2 210; CHECK-NEXT: # %bb.1: # %trueblock 211; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 212; CHECK-NEXT: vaadd.vv v8, v8, v9 213; CHECK-NEXT: ret 214; CHECK-NEXT: .LBB5_2: # %falseblock 215; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 216; CHECK-NEXT: vaadd.vv v8, v8, v10 217; CHECK-NEXT: ret 218entry: 219 br i1 %cond, label %trueblock, label %falseblock 220 221trueblock: 222 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 223 <vscale x 1 x i8> undef, 224 <vscale x 1 x i8> %0, 225 <vscale x 1 x i8> %1, 226 iXLen 0, iXLen %3) 227 br label %mergeblock 228 229falseblock: 230 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 231 <vscale x 1 x i8> undef, 232 <vscale x 1 x i8> %0, 233 <vscale x 1 x i8> %2, 234 iXLen 0, iXLen %3) 235 br label %mergeblock 236 237mergeblock: 238 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock] 239 240 ret <vscale x 1 x i8> %c 241} 242 243; Test same rounding mode in diamond with same dominating vxrm. 244define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { 245; CHECK-LABEL: test7: 246; CHECK: # %bb.0: # %entry 247; CHECK-NEXT: andi a1, a1, 1 248; CHECK-NEXT: csrwi vxrm, 0 249; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 250; CHECK-NEXT: vaadd.vv v8, v8, v9 251; CHECK-NEXT: beqz a1, .LBB6_2 252; CHECK-NEXT: # %bb.1: # %trueblock 253; CHECK-NEXT: vaadd.vv v8, v8, v10 254; CHECK-NEXT: ret 255; CHECK-NEXT: .LBB6_2: # %falseblock 256; CHECK-NEXT: vasub.vv v8, v8, v10 257; CHECK-NEXT: ret 258entry: 259 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 260 <vscale x 1 x i8> undef, 261 <vscale x 1 x i8> %0, 262 <vscale x 1 x i8> %1, 263 iXLen 0, iXLen %3) 264 br i1 %cond, label %trueblock, label %falseblock 265 266trueblock: 267 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 268 <vscale x 1 x i8> undef, 269 <vscale x 1 x i8> %a, 270 <vscale x 1 x i8> %2, 271 iXLen 0, iXLen %3) 272 br label %mergeblock 273 274falseblock: 275 %c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( 276 <vscale x 1 x i8> undef, 277 <vscale x 1 x i8> %a, 278 <vscale x 1 x i8> %2, 279 iXLen 0, iXLen %3) 280 br label %mergeblock 281 282mergeblock: 283 %d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock] 284 285 ret <vscale x 1 x i8> %d 286} 287 288; Test same rounding mode in diamond with same vxrm at merge. 289define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { 290; CHECK-LABEL: test8: 291; CHECK: # %bb.0: # %entry 292; CHECK-NEXT: andi a1, a1, 1 293; CHECK-NEXT: csrwi vxrm, 0 294; CHECK-NEXT: beqz a1, .LBB7_2 295; CHECK-NEXT: # %bb.1: # %trueblock 296; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 297; CHECK-NEXT: vaadd.vv v8, v8, v9 298; CHECK-NEXT: vaadd.vv v8, v8, v10 299; CHECK-NEXT: ret 300; CHECK-NEXT: .LBB7_2: # %falseblock 301; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 302; CHECK-NEXT: vasub.vv v8, v8, v9 303; CHECK-NEXT: vaadd.vv v8, v8, v10 304; CHECK-NEXT: ret 305entry: 306 br i1 %cond, label %trueblock, label %falseblock 307 308trueblock: 309 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 310 <vscale x 1 x i8> undef, 311 <vscale x 1 x i8> %0, 312 <vscale x 1 x i8> %1, 313 iXLen 0, iXLen %3) 314 br label %mergeblock 315 316falseblock: 317 %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( 318 <vscale x 1 x i8> undef, 319 <vscale x 1 x i8> %0, 320 <vscale x 1 x i8> %1, 321 iXLen 0, iXLen %3) 322 br label %mergeblock 323 324mergeblock: 325 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock] 326 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 327 <vscale x 1 x i8> undef, 328 <vscale x 1 x i8> %c, 329 <vscale x 1 x i8> %2, 330 iXLen 0, iXLen %3) 331 332 ret <vscale x 1 x i8> %d 333} 334 335; Test same rounding mode in diamond with different vxrm at merge. 336define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { 337; CHECK-LABEL: test9: 338; CHECK: # %bb.0: # %entry 339; CHECK-NEXT: andi a1, a1, 1 340; CHECK-NEXT: csrwi vxrm, 0 341; CHECK-NEXT: beqz a1, .LBB8_2 342; CHECK-NEXT: # %bb.1: # %trueblock 343; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 344; CHECK-NEXT: vaadd.vv v8, v8, v9 345; CHECK-NEXT: j .LBB8_3 346; CHECK-NEXT: .LBB8_2: # %falseblock 347; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 348; CHECK-NEXT: vasub.vv v8, v8, v9 349; CHECK-NEXT: .LBB8_3: # %mergeblock 350; CHECK-NEXT: csrwi vxrm, 2 351; CHECK-NEXT: vaadd.vv v8, v8, v10 352; CHECK-NEXT: ret 353entry: 354 br i1 %cond, label %trueblock, label %falseblock 355 356trueblock: 357 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 358 <vscale x 1 x i8> undef, 359 <vscale x 1 x i8> %0, 360 <vscale x 1 x i8> %1, 361 iXLen 0, iXLen %3) 362 br label %mergeblock 363 364falseblock: 365 %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( 366 <vscale x 1 x i8> undef, 367 <vscale x 1 x i8> %0, 368 <vscale x 1 x i8> %1, 369 iXLen 0, iXLen %3) 370 br label %mergeblock 371 372mergeblock: 373 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock] 374 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( 375 <vscale x 1 x i8> undef, 376 <vscale x 1 x i8> %c, 377 <vscale x 1 x i8> %2, 378 iXLen 2, iXLen %3) 379 380 ret <vscale x 1 x i8> %d 381} 382 383; Test loop with no dominating vxrm write. 384define void @test10(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) { 385; CHECK-LABEL: test10: 386; CHECK: # %bb.0: # %entry 387; CHECK-NEXT: beqz a3, .LBB9_3 388; CHECK-NEXT: # %bb.1: # %for.body.preheader 389; CHECK-NEXT: csrwi vxrm, 2 390; CHECK-NEXT: .LBB9_2: # %for.body 391; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 392; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma 393; CHECK-NEXT: vle8.v v8, (a1) 394; CHECK-NEXT: vle8.v v9, (a2) 395; CHECK-NEXT: vaadd.vv v8, v8, v9 396; CHECK-NEXT: sub a3, a3, a4 397; CHECK-NEXT: vse8.v v8, (a0) 398; CHECK-NEXT: bnez a3, .LBB9_2 399; CHECK-NEXT: .LBB9_3: # %for.end 400; CHECK-NEXT: ret 401entry: 402 %tobool.not9 = icmp eq iXLen %n, 0 403 br i1 %tobool.not9, label %for.end, label %for.body 404 405for.body: 406 %n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ] 407 %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5) 408 %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl) 409 %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl) 410 %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl) 411 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl) 412 %sub = sub iXLen %n.addr.011, %vl 413 %tobool.not = icmp eq iXLen %sub, 0 414 br i1 %tobool.not, label %for.end, label %for.body 415 416for.end: 417 ret void 418} 419 420declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg) 421declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen) 422declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen) 423 424; Test loop with dominating vxrm write. Make sure there is no write in the loop. 425define void @test11(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) { 426; CHECK-LABEL: test11: 427; CHECK: # %bb.0: # %entry 428; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma 429; CHECK-NEXT: vle8.v v8, (a1) 430; CHECK-NEXT: vle8.v v9, (a2) 431; CHECK-NEXT: csrwi vxrm, 2 432; CHECK-NEXT: .LBB10_1: # %for.body 433; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 434; CHECK-NEXT: vaadd.vv v8, v8, v9 435; CHECK-NEXT: sub a3, a3, a4 436; CHECK-NEXT: vse8.v v8, (a0) 437; CHECK-NEXT: beqz a3, .LBB10_3 438; CHECK-NEXT: # %bb.2: # %for.body 439; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1 440; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma 441; CHECK-NEXT: vle8.v v8, (a1) 442; CHECK-NEXT: vle8.v v9, (a2) 443; CHECK-NEXT: j .LBB10_1 444; CHECK-NEXT: .LBB10_3: # %for.end 445; CHECK-NEXT: ret 446entry: 447 %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5) 448 %load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl) 449 %load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl) 450 %vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl) 451 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl) 452 %suba = sub iXLen %n, %vl 453 %tobool.not9 = icmp eq iXLen %suba, 0 454 br i1 %tobool.not9, label %for.end, label %for.body 455 456for.body: 457 %n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ] 458 %vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5) 459 %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2) 460 %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2) 461 %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2) 462 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2) 463 %sub = sub iXLen %n.addr.011, %vl2 464 %tobool.not = icmp eq iXLen %sub, 0 465 br i1 %tobool.not, label %for.end, label %for.body 466 467for.end: 468 ret void 469} 470 471; The edge from entry to block2 is a critical edge. The vxrm write in block2 472; is redundant when coming from block1, but is needed when coming from entry. 473; FIXME: We could remove the write from the end of block1 without splitting the 474; critical edge. 475define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) { 476; CHECK-LABEL: test12: 477; CHECK: # %bb.0: # %entry 478; CHECK-NEXT: andi a0, a0, 1 479; CHECK-NEXT: csrwi vxrm, 0 480; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 481; CHECK-NEXT: vaadd.vv v9, v8, v9 482; CHECK-NEXT: beqz a0, .LBB11_2 483; CHECK-NEXT: # %bb.1: # %block1 484; CHECK-NEXT: csrwi vxrm, 1 485; CHECK-NEXT: vaadd.vv v9, v8, v9 486; CHECK-NEXT: csrwi vxrm, 2 487; CHECK-NEXT: .LBB11_2: # %block2 488; CHECK-NEXT: csrwi vxrm, 2 489; CHECK-NEXT: vaadd.vv v8, v8, v9 490; CHECK-NEXT: ret 491entry: 492 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl) 493 br i1 %c1, label %block1, label %block2 494 495block1: 496 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl) 497 br label %block2 498 499block2: 500 %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1] 501 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl) 502 ret <vscale x 1 x i8> %d 503} 504 505; Similar to test12, but introduces a second critical edge from block1 to 506; block3. Now the write to vxrm at the end of block1, can't be removed because 507; it is needed by block3. 508define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) { 509; CHECK-LABEL: test13: 510; CHECK: # %bb.0: # %entry 511; CHECK-NEXT: andi a0, a0, 1 512; CHECK-NEXT: csrwi vxrm, 0 513; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma 514; CHECK-NEXT: vaadd.vv v10, v8, v9 515; CHECK-NEXT: beqz a0, .LBB12_2 516; CHECK-NEXT: # %bb.1: # %block1 517; CHECK-NEXT: csrwi vxrm, 1 518; CHECK-NEXT: vaadd.vv v10, v8, v10 519; CHECK-NEXT: andi a1, a1, 1 520; CHECK-NEXT: csrwi vxrm, 2 521; CHECK-NEXT: beqz a1, .LBB12_3 522; CHECK-NEXT: .LBB12_2: # %block2 523; CHECK-NEXT: csrwi vxrm, 2 524; CHECK-NEXT: vaadd.vv v8, v8, v10 525; CHECK-NEXT: ret 526; CHECK-NEXT: .LBB12_3: # %block3 527; CHECK-NEXT: vaadd.vv v8, v9, v10 528; CHECK-NEXT: ret 529entry: 530 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl) 531 br i1 %c1, label %block1, label %block2 532 533block1: 534 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl) 535 br i1 %c2, label %block2, label %block3 536 537block2: 538 %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1] 539 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl) 540 ret <vscale x 1 x i8> %d 541 542block3: 543 %e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl) 544 ret <vscale x 1 x i8> %e 545} 546