1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s 3 4target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 5target triple = "riscv64" 6 7; Dependence distance between read and write is greater than the trip 8; count of the loop. Thus, values written are never read for any 9; valid vectorization of the loop. 10define void @test(ptr %p) { 11; CHECK-LABEL: @test( 12; CHECK-NEXT: entry: 13; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 14; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 15; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] 16; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 17; CHECK: vector.ph: 18; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 19; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 20; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] 21; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] 22; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 23; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 24; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 25; CHECK: vector.body: 26; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 27; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 28; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] 29; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 30; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32 31; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 200 32; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] 33; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 34; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32 35; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 36; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 37; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 38; CHECK: middle.block: 39; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] 40; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 41; CHECK: scalar.ph: 42; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 43; CHECK-NEXT: br label [[LOOP:%.*]] 44; CHECK: loop: 45; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 46; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 47; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 48; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200 49; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 50; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 51; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 52; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 53; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] 54; CHECK: exit: 55; CHECK-NEXT: ret void 56; 57entry: 58 br label %loop 59 60loop: 61 %iv = phi i64 [0, %entry], [%iv.next, %loop] 62 %a1 = getelementptr i64, ptr %p, i64 %iv 63 %v = load i64, ptr %a1, align 32 64 %offset = add i64 %iv, 200 65 %a2 = getelementptr i64, ptr %p, i64 %offset 66 store i64 %v, ptr %a2, align 32 67 %iv.next = add i64 %iv, 1 68 %cmp = icmp ne i64 %iv, 199 69 br i1 %cmp, label %loop, label %exit 70 71exit: 72 ret void 73} 74 75; Dependence distance is less than trip count, thus we must prove that 76; chosen VF guaranteed to be less than dependence distance. 77define void @test_may_clobber(ptr %p) { 78; CHECK-LABEL: @test_may_clobber( 79; CHECK-NEXT: entry: 80; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 81; CHECK: vector.ph: 82; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 83; CHECK: vector.body: 84; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 85; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 86; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] 87; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 88; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 89; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100 90; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] 91; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 92; CHECK-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 93; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 94; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 95; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 96; CHECK: middle.block: 97; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 98; CHECK: scalar.ph: 99; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 100; CHECK-NEXT: br label [[LOOP:%.*]] 101; CHECK: loop: 102; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 103; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 104; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 105; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 106; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 107; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 108; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 109; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 110; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] 111; CHECK: exit: 112; CHECK-NEXT: ret void 113; 114entry: 115 br label %loop 116 117loop: 118 %iv = phi i64 [0, %entry], [%iv.next, %loop] 119 %a1 = getelementptr i64, ptr %p, i64 %iv 120 %v = load i64, ptr %a1, align 32 121 %offset = add i64 %iv, 100 122 %a2 = getelementptr i64, ptr %p, i64 %offset 123 store i64 %v, ptr %a2, align 32 124 %iv.next = add i64 %iv, 1 125 %cmp = icmp ne i64 %iv, 199 126 br i1 %cmp, label %loop, label %exit 127 128exit: 129 ret void 130} 131 132; Trviailly no overlap due to maximum possible value of VLEN and LMUL 133define void @trivial_due_max_vscale(ptr %p) { 134; CHECK-LABEL: @trivial_due_max_vscale( 135; CHECK-NEXT: entry: 136; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 137; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 138; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] 139; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 140; CHECK: vector.ph: 141; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 142; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 143; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] 144; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] 145; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 146; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 147; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 148; CHECK: vector.body: 149; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 150; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 151; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] 152; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 153; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32 154; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 8192 155; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] 156; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 157; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32 158; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 159; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 160; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 161; CHECK: middle.block: 162; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] 163; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 164; CHECK: scalar.ph: 165; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 166; CHECK-NEXT: br label [[LOOP:%.*]] 167; CHECK: loop: 168; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 169; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 170; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 171; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192 172; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 173; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 174; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 175; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 176; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] 177; CHECK: exit: 178; CHECK-NEXT: ret void 179; 180entry: 181 br label %loop 182 183loop: 184 %iv = phi i64 [0, %entry], [%iv.next, %loop] 185 %a1 = getelementptr i64, ptr %p, i64 %iv 186 %v = load i64, ptr %a1, align 32 187 %offset = add i64 %iv, 8192 188 %a2 = getelementptr i64, ptr %p, i64 %offset 189 store i64 %v, ptr %a2, align 32 190 %iv.next = add i64 %iv, 1 191 %cmp = icmp ne i64 %iv, 199 192 br i1 %cmp, label %loop, label %exit 193 194exit: 195 ret void 196} 197 198; Dependence distance could be violated via LMUL>=2 or interleaving 199define void @no_high_lmul_or_interleave(ptr %p) { 200; CHECK-LABEL: @no_high_lmul_or_interleave( 201; CHECK-NEXT: entry: 202; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 203; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 204; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] 205; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 206; CHECK: vector.ph: 207; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 208; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 209; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] 210; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] 211; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 212; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 213; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 214; CHECK: vector.body: 215; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 216; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 217; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] 218; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 219; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32 220; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 1024 221; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] 222; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 223; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32 224; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 225; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 226; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 227; CHECK: middle.block: 228; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] 229; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 230; CHECK: scalar.ph: 231; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 232; CHECK-NEXT: br label [[LOOP:%.*]] 233; CHECK: loop: 234; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 235; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] 236; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 237; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024 238; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] 239; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 240; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 241; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 242; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] 243; CHECK: exit: 244; CHECK-NEXT: ret void 245; 246entry: 247 br label %loop 248 249loop: 250 %iv = phi i64 [0, %entry], [%iv.next, %loop] 251 %a1 = getelementptr i64, ptr %p, i64 %iv 252 %v = load i64, ptr %a1, align 32 253 %offset = add i64 %iv, 1024 254 %a2 = getelementptr i64, ptr %p, i64 %offset 255 store i64 %v, ptr %a2, align 32 256 %iv.next = add i64 %iv, 1 257 %cmp = icmp ne i64 %iv, 199 258 br i1 %cmp, label %loop, label %exit 259 260exit: 261 ret void 262} 263