1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -force-tail-folding-style=data-with-evl \ 4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 5; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL 6 7; RUN: opt -passes=loop-vectorize \ 8; RUN: -force-tail-folding-style=none \ 9; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 10; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP 11 12; Dependence distance between read and write is greater than the trip 13; count of the loop. Thus, values written are never read for any 14; valid vectorization of the loop. 15define void @test(ptr %p) { 16; IF-EVL-LABEL: @test( 17; IF-EVL-NEXT: entry: 18; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 19; IF-EVL: vector.ph: 20; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 21; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 22; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 23; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP2]] 24; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 25; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 26; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 27; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 28; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 29; IF-EVL: vector.body: 30; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 31; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 32; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]] 33; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true) 34; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0 35; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] 36; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 37; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) 38; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP7]], 200 39; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] 40; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 41; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) 42; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 43; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] 44; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 45; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 46; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 47; IF-EVL: middle.block: 48; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 49; IF-EVL: scalar.ph: 50; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 51; IF-EVL-NEXT: br label [[LOOP:%.*]] 52; IF-EVL: loop: 53; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 54; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 55; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 8 56; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200 57; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 58; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 8 59; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 60; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 61; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] 62; IF-EVL: exit: 63; IF-EVL-NEXT: ret void 64; 65; NO-VP-LABEL: @test( 66; NO-VP-NEXT: entry: 67; NO-VP-NEXT: br label [[LOOP:%.*]] 68; NO-VP: loop: 69; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 70; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]] 71; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 8 72; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200 73; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 74; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 8 75; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 76; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 77; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 78; NO-VP: exit: 79; NO-VP-NEXT: ret void 80; 81entry: 82 br label %loop 83 84loop: 85 %iv = phi i64 [0, %entry], [%iv.next, %loop] 86 %a1 = getelementptr i64, ptr %p, i64 %iv 87 %v = load i64, ptr %a1, align 8 88 %offset = add i64 %iv, 200 89 %a2 = getelementptr i64, ptr %p, i64 %offset 90 store i64 %v, ptr %a2, align 8 91 %iv.next = add i64 %iv, 1 92 %cmp = icmp ne i64 %iv, 199 93 br i1 %cmp, label %loop, label %exit 94 95exit: 96 ret void 97} 98 99; Dependence distance is less than trip count, thus we must prove that 100; chosen VF guaranteed to be less than dependence distance. 101define void @test_may_clobber1(ptr %p) { 102; IF-EVL-LABEL: @test_may_clobber1( 103; IF-EVL-NEXT: entry: 104; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 105; IF-EVL: vector.ph: 106; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 107; IF-EVL: vector.body: 108; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 110; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] 111; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 112; IF-EVL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 113; IF-EVL-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100 114; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] 115; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 116; IF-EVL-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 117; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 118; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 119; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 120; IF-EVL: middle.block: 121; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 122; IF-EVL: scalar.ph: 123; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 124; IF-EVL-NEXT: br label [[LOOP:%.*]] 125; IF-EVL: loop: 126; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 127; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 128; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 129; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 130; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 131; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 132; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 133; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 134; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] 135; IF-EVL: exit: 136; IF-EVL-NEXT: ret void 137; 138; NO-VP-LABEL: @test_may_clobber1( 139; NO-VP-NEXT: entry: 140; NO-VP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 141; NO-VP: vector.ph: 142; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 143; NO-VP: vector.body: 144; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 145; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 146; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] 147; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 148; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 149; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100 150; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] 151; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 152; NO-VP-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 153; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 154; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 155; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 156; NO-VP: middle.block: 157; NO-VP-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 158; NO-VP: scalar.ph: 159; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 160; NO-VP-NEXT: br label [[LOOP:%.*]] 161; NO-VP: loop: 162; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 163; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 164; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 165; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 166; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 167; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 168; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 169; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 170; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] 171; NO-VP: exit: 172; NO-VP-NEXT: ret void 173; 174entry: 175 br label %loop 176 177loop: 178 %iv = phi i64 [0, %entry], [%iv.next, %loop] 179 %a1 = getelementptr i64, ptr %p, i64 %iv 180 %v = load i64, ptr %a1, align 32 181 %offset = add i64 %iv, 100 182 %a2 = getelementptr i64, ptr %p, i64 %offset 183 store i64 %v, ptr %a2, align 32 184 %iv.next = add i64 %iv, 1 185 %cmp = icmp ne i64 %iv, 199 186 br i1 %cmp, label %loop, label %exit 187 188exit: 189 ret void 190} 191 192define void @test_may_clobber2(ptr %p) { 193; IF-EVL-LABEL: @test_may_clobber2( 194; IF-EVL-NEXT: entry: 195; IF-EVL-NEXT: br label [[LOOP:%.*]] 196; IF-EVL: loop: 197; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 198; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]] 199; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 200; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 9 201; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 202; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 203; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 204; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 205; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 206; IF-EVL: exit: 207; IF-EVL-NEXT: ret void 208; 209; NO-VP-LABEL: @test_may_clobber2( 210; NO-VP-NEXT: entry: 211; NO-VP-NEXT: br label [[LOOP:%.*]] 212; NO-VP: loop: 213; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 214; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]] 215; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 216; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 9 217; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 218; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 219; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 220; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 221; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 222; NO-VP: exit: 223; NO-VP-NEXT: ret void 224; 225entry: 226 br label %loop 227 228loop: 229 %iv = phi i64 [0, %entry], [%iv.next, %loop] 230 %a1 = getelementptr i64, ptr %p, i64 %iv 231 %v = load i64, ptr %a1, align 32 232 %offset = add i64 %iv, 9 233 %a2 = getelementptr i64, ptr %p, i64 %offset 234 store i64 %v, ptr %a2, align 32 235 %iv.next = add i64 %iv, 1 236 %cmp = icmp ne i64 %iv, 199 237 br i1 %cmp, label %loop, label %exit 238 239exit: 240 ret void 241} 242 243define void @test_may_clobber3(ptr %p) { 244; IF-EVL-LABEL: @test_may_clobber3( 245; IF-EVL-NEXT: entry: 246; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 247; IF-EVL: vector.ph: 248; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 249; IF-EVL: vector.body: 250; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 251; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 252; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] 253; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 254; IF-EVL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 32 255; IF-EVL-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 10 256; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] 257; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 258; IF-EVL-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 259; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 260; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 261; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 262; IF-EVL: middle.block: 263; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 264; IF-EVL: scalar.ph: 265; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 266; IF-EVL-NEXT: br label [[LOOP:%.*]] 267; IF-EVL: loop: 268; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 269; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 270; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 271; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 10 272; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 273; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 274; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 275; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 276; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] 277; IF-EVL: exit: 278; IF-EVL-NEXT: ret void 279; 280; NO-VP-LABEL: @test_may_clobber3( 281; NO-VP-NEXT: entry: 282; NO-VP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 283; NO-VP: vector.ph: 284; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] 285; NO-VP: vector.body: 286; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 287; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 288; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] 289; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 290; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 32 291; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 10 292; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] 293; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 294; NO-VP-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 295; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 296; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 297; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 298; NO-VP: middle.block: 299; NO-VP-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 300; NO-VP: scalar.ph: 301; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 302; NO-VP-NEXT: br label [[LOOP:%.*]] 303; NO-VP: loop: 304; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 305; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 306; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 307; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 10 308; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 309; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 310; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 311; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 312; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] 313; NO-VP: exit: 314; NO-VP-NEXT: ret void 315; 316entry: 317 br label %loop 318 319loop: 320 %iv = phi i64 [0, %entry], [%iv.next, %loop] 321 %a1 = getelementptr i64, ptr %p, i64 %iv 322 %v = load i64, ptr %a1, align 32 323 %offset = add i64 %iv, 10 324 %a2 = getelementptr i64, ptr %p, i64 %offset 325 store i64 %v, ptr %a2, align 32 326 %iv.next = add i64 %iv, 1 327 %cmp = icmp ne i64 %iv, 199 328 br i1 %cmp, label %loop, label %exit 329 330exit: 331 ret void 332} 333 334; Trviailly no overlap due to maximum possible value of VLEN and LMUL 335define void @trivial_due_max_vscale(ptr %p) { 336; IF-EVL-LABEL: @trivial_due_max_vscale( 337; IF-EVL-NEXT: entry: 338; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 339; IF-EVL: vector.ph: 340; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 341; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 342; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 343; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP2]] 344; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 345; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 346; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 347; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 348; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 349; IF-EVL: vector.body: 350; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 351; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 352; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]] 353; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true) 354; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0 355; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] 356; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 357; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) 358; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP7]], 8192 359; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] 360; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 361; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) 362; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 363; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] 364; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 365; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 366; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 367; IF-EVL: middle.block: 368; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 369; IF-EVL: scalar.ph: 370; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 371; IF-EVL-NEXT: br label [[LOOP:%.*]] 372; IF-EVL: loop: 373; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 374; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 375; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 376; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192 377; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 378; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 379; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 380; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 381; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] 382; IF-EVL: exit: 383; IF-EVL-NEXT: ret void 384; 385; NO-VP-LABEL: @trivial_due_max_vscale( 386; NO-VP-NEXT: entry: 387; NO-VP-NEXT: br label [[LOOP:%.*]] 388; NO-VP: loop: 389; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 390; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]] 391; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 392; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192 393; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 394; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 395; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 396; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 397; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 398; NO-VP: exit: 399; NO-VP-NEXT: ret void 400; 401entry: 402 br label %loop 403 404loop: 405 %iv = phi i64 [0, %entry], [%iv.next, %loop] 406 %a1 = getelementptr i64, ptr %p, i64 %iv 407 %v = load i64, ptr %a1, align 32 408 %offset = add i64 %iv, 8192 409 %a2 = getelementptr i64, ptr %p, i64 %offset 410 store i64 %v, ptr %a2, align 32 411 %iv.next = add i64 %iv, 1 412 %cmp = icmp ne i64 %iv, 199 413 br i1 %cmp, label %loop, label %exit 414 415exit: 416 ret void 417} 418 419; Dependence distance could be violated via LMUL>=2 or interleaving 420define void @no_high_lmul_or_interleave(ptr %p) { 421; IF-EVL-LABEL: @no_high_lmul_or_interleave( 422; IF-EVL-NEXT: entry: 423; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 424; IF-EVL: vector.ph: 425; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 426; IF-EVL-NEXT: [[TMP1:%.*]] = sub i64 [[TMP7]], 1 427; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 3002, [[TMP1]] 428; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] 429; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 430; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 431; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 432; IF-EVL: vector.body: 433; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 434; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 435; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 3002, [[EVL_BASED_IV]] 436; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult i64 [[AVL]], 1024 437; IF-EVL-NEXT: [[SAFE_AVL:%.*]] = select i1 [[TMP9]], i64 [[AVL]], i64 1024 438; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[SAFE_AVL]], i32 1, i1 true) 439; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[EVL_BASED_IV]], 0 440; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] 441; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 442; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr align 32 [[TMP3]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP10]]) 443; IF-EVL-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 1024 444; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP4]] 445; IF-EVL-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 446; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP10]]) 447; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 448; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] 449; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 450; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 451; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 452; IF-EVL: middle.block: 453; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 454; IF-EVL: scalar.ph: 455; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 456; IF-EVL-NEXT: br label [[LOOP:%.*]] 457; IF-EVL: loop: 458; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 459; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 460; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 461; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024 462; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 463; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 464; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 465; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 3001 466; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] 467; IF-EVL: exit: 468; IF-EVL-NEXT: ret void 469; 470; NO-VP-LABEL: @no_high_lmul_or_interleave( 471; NO-VP-NEXT: entry: 472; NO-VP-NEXT: br label [[LOOP:%.*]] 473; NO-VP: loop: 474; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 475; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]] 476; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 477; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024 478; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 479; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 480; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 481; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 3001 482; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 483; NO-VP: exit: 484; NO-VP-NEXT: ret void 485; 486entry: 487 br label %loop 488 489loop: 490 %iv = phi i64 [0, %entry], [%iv.next, %loop] 491 %a1 = getelementptr i64, ptr %p, i64 %iv 492 %v = load i64, ptr %a1, align 32 493 %offset = add i64 %iv, 1024 494 %a2 = getelementptr i64, ptr %p, i64 %offset 495 store i64 %v, ptr %a2, align 32 496 %iv.next = add i64 %iv, 1 497 %cmp = icmp ne i64 %iv, 3001 498 br i1 %cmp, label %loop, label %exit 499 500exit: 501 ret void 502} 503 504define void @non-power-2-storeloadforward(ptr %A) { 505; IF-EVL-LABEL: @non-power-2-storeloadforward( 506; IF-EVL-NEXT: entry: 507; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 508; IF-EVL: for.body: 509; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 16, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 510; IF-EVL-NEXT: [[TMP0:%.*]] = add nsw i64 [[IV]], -3 511; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] 512; IF-EVL-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 513; IF-EVL-NEXT: [[TMP2:%.*]] = add nsw i64 [[IV]], 4 514; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] 515; IF-EVL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 516; IF-EVL-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], [[TMP1]] 517; IF-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 518; IF-EVL-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX5]], align 4 519; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 520; IF-EVL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 521; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], 128 522; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] 523; IF-EVL: for.end: 524; IF-EVL-NEXT: ret void 525; 526; NO-VP-LABEL: @non-power-2-storeloadforward( 527; NO-VP-NEXT: entry: 528; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 529; NO-VP: for.body: 530; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 16, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 531; NO-VP-NEXT: [[TMP0:%.*]] = add nsw i64 [[IV]], -3 532; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] 533; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 534; NO-VP-NEXT: [[TMP2:%.*]] = add nsw i64 [[IV]], 4 535; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] 536; NO-VP-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 537; NO-VP-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], [[TMP1]] 538; NO-VP-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 539; NO-VP-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX5]], align 4 540; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 541; NO-VP-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 542; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], 128 543; NO-VP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] 544; NO-VP: for.end: 545; NO-VP-NEXT: ret void 546; 547entry: 548 br label %for.body 549 550for.body: 551 %iv = phi i64 [ 16, %entry ], [ %iv.next, %for.body ] 552 %0 = add nsw i64 %iv, -3 553 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0 554 %1 = load i32, ptr %arrayidx, align 4 555 %2 = add nsw i64 %iv, 4 556 %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %2 557 %3 = load i32, ptr %arrayidx2, align 4 558 %add3 = add nsw i32 %3, %1 559 %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %iv 560 store i32 %add3, ptr %arrayidx5, align 4 561 %iv.next = add i64 %iv, 1 562 %lftr.wideiv = trunc i64 %iv.next to i32 563 %exitcond = icmp ne i32 %lftr.wideiv, 128 564 br i1 %exitcond, label %for.body, label %for.end 565 566for.end: 567 ret void 568} 569 570