1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize,dce,instcombine -S \ 3; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s 4 5; Ensure that we can vectorize loops such as: 6; int *ptr = c; 7; for (long long i = 0; i < n; i++) { 8; int X1 = *ptr++; 9; int X2 = *ptr++; 10; a[i] = X1 + 1; 11; b[i] = X2 + 1; 12; } 13; with scalable vectors, including unrolling. The test below makes sure 14; that we can use gather instructions with the correct offsets, taking 15; vscale into account. 16 17define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapture %b, ptr nocapture readonly %c, i64 %n) #0 { 18; CHECK-LABEL: @widen_ptr_phi_unrolled( 19; CHECK-NEXT: entry: 20; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 21; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 22; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 23; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 24; CHECK: vector.ph: 25; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 26; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 27; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] 28; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 29; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 30; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[N_VEC]], 3 31; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP26]] 32; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 33; CHECK: vector.body: 34; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 35; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 36; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 37; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 5 38; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[OFFSET_IDX]] 39; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[C]], i64 [[OFFSET_IDX]] 40; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP7]] 41; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP]], align 4 42; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) 43; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 44; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1 45; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP2]], align 4 46; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC3]]) 47; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 0 48; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 1 49; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP9]], splat (i32 1) 50; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], splat (i32 1) 51; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 52; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() 53; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP16]], 4 54; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[DOTIDX]] 55; CHECK-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4 56; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP17]], align 4 57; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], splat (i32 1) 58; CHECK-NEXT: [[TMP19:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], splat (i32 1) 59; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 60; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 61; CHECK-NEXT: [[DOTIDX5:%.*]] = shl nuw nsw i64 [[TMP21]], 4 62; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 [[DOTIDX5]] 63; CHECK-NEXT: store <vscale x 4 x i32> [[TMP18]], ptr [[TMP20]], align 4 64; CHECK-NEXT: store <vscale x 4 x i32> [[TMP19]], ptr [[TMP22]], align 4 65; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 66; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 67; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 68; CHECK: middle.block: 69; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 70; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 71; CHECK: scalar.ph: 72; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[C]], [[ENTRY:%.*]] ] 73; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 74; CHECK-NEXT: br label [[FOR_BODY:%.*]] 75; CHECK: for.body: 76; CHECK-NEXT: [[PTR_014:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 77; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 78; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_014]], i64 4 79; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[PTR_014]], align 4 80; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds nuw i8, ptr [[PTR_014]], i64 8 81; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4 82; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 83; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_013]] 84; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 85; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], 1 86; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_013]] 87; CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX3]], align 4 88; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 89; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] 90; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 91; CHECK: for.exit: 92; CHECK-NEXT: ret void 93; 94entry: 95 br label %for.body 96 97for.body: ; preds = %entry, %for.body 98 %ptr.014 = phi ptr [ %incdec.ptr1, %for.body ], [ %c, %entry ] 99 %i.013 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 100 %incdec.ptr = getelementptr inbounds i32, ptr %ptr.014, i64 1 101 %0 = load i32, ptr %ptr.014, align 4 102 %incdec.ptr1 = getelementptr inbounds i32, ptr %ptr.014, i64 2 103 %1 = load i32, ptr %incdec.ptr, align 4 104 %add = add nsw i32 %0, 1 105 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i.013 106 store i32 %add, ptr %arrayidx, align 4 107 %add2 = add nsw i32 %1, 1 108 %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %i.013 109 store i32 %add2, ptr %arrayidx3, align 4 110 %inc = add nuw nsw i64 %i.013, 1 111 %exitcond.not = icmp eq i64 %inc, %n 112 br i1 %exitcond.not, label %for.exit, label %for.body, !llvm.loop !0 113 114for.exit: ; preds = %for.body 115 ret void 116} 117 118 119; Ensure we can vectorise loops without interleaving, e.g.: 120; int *D = dst; 121; int *S = src; 122; for (long long i = 0; i < n; i++) { 123; *D = *S * 2; 124; D++; 125; S++; 126; } 127; This takes us down a different codepath to the test above, where 128; here we treat the PHIs as being uniform. 129 130define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) #0 { 131; CHECK-LABEL: @widen_2ptrs_phi_unrolled( 132; CHECK-NEXT: entry: 133; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 134; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 135; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 136; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 137; CHECK: vector.ph: 138; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 139; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 140; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] 141; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 142; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 143; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 2 144; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]] 145; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 146; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]] 147; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 148; CHECK: vector.body: 149; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 150; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 151; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] 152; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2 153; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]] 154; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 155; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP7]], 4 156; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]] 157; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[NEXT_GEP]], align 4 158; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 159; CHECK-NEXT: [[TMP9:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1) 160; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD6]], splat (i32 1) 161; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() 162; CHECK-NEXT: [[DOTIDX7:%.*]] = shl nuw nsw i64 [[TMP11]], 4 163; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 [[DOTIDX7]] 164; CHECK-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[NEXT_GEP5]], align 4 165; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP12]], align 4 166; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 167; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 168; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 169; CHECK: middle.block: 170; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 171; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 172; CHECK: scalar.ph: 173; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 174; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] 175; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] 176; CHECK-NEXT: br label [[FOR_BODY:%.*]] 177; CHECK: for.body: 178; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 179; CHECK-NEXT: [[S_010:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 180; CHECK-NEXT: [[D_09:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 181; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[S_010]], align 4 182; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP14]], 1 183; CHECK-NEXT: store i32 [[MUL]], ptr [[D_09]], align 4 184; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[D_09]], i64 4 185; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds nuw i8, ptr [[S_010]], i64 4 186; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 187; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] 188; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 189; CHECK: for.cond.cleanup: 190; CHECK-NEXT: ret void 191; 192entry: 193 br label %for.body 194 195for.body: ; preds = %entry, %for.body 196 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 197 %S.010 = phi ptr [ %incdec.ptr1, %for.body ], [ %src, %entry ] 198 %D.09 = phi ptr [ %incdec.ptr, %for.body ], [ %dst, %entry ] 199 %0 = load i32, ptr %S.010, align 4 200 %mul = shl nsw i32 %0, 1 201 store i32 %mul, ptr %D.09, align 4 202 %incdec.ptr = getelementptr inbounds i32, ptr %D.09, i64 1 203 %incdec.ptr1 = getelementptr inbounds i32, ptr %S.010, i64 1 204 %inc = add nuw nsw i64 %i.011, 1 205 %exitcond.not = icmp eq i64 %inc, %n 206 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 207 208for.cond.cleanup: ; preds = %for.body 209 ret void 210} 211 212 213; 214; Check multiple pointer induction variables where only one is recognized as 215; uniform and remains uniform after vectorization. The other pointer induction 216; variable is not recognized as uniform and is not uniform after vectorization 217; because it is stored to memory. 218; 219 220define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { 221; CHECK-LABEL: @pointer_iv_mixed( 222; CHECK-NEXT: entry: 223; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 224; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 225; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 226; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i64 [[SMAX]], [[TMP1]] 227; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 228; CHECK: vector.ph: 229; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 230; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -2 231; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], [[DOTNEG]] 232; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 233; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1 234; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 2 235; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]] 236; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3 237; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]] 238; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 239; CHECK: vector.body: 240; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] 241; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 242; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 243; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 244; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3 245; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 246; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2) 247; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]] 248; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 249; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] 250; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0 251; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP11]], align 8 252; CHECK-NEXT: [[TMP12]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 253; CHECK-NEXT: store <vscale x 2 x ptr> [[VECTOR_GEP]], ptr [[NEXT_GEP]], align 8 254; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 255; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] 256; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 257; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 258; CHECK: middle.block: 259; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP12]]) 260; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 261; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 262; CHECK: scalar.ph: 263; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 264; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] 265; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] 266; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 267; CHECK-NEXT: br label [[FOR_BODY:%.*]] 268; CHECK: for.body: 269; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 270; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[VAR3:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 271; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 272; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[VAR2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 273; CHECK-NEXT: [[VAR1:%.*]] = load i32, ptr [[P]], align 8 274; CHECK-NEXT: [[VAR2]] = add i32 [[VAR1]], [[VAR0]] 275; CHECK-NEXT: store ptr [[P]], ptr [[Q]], align 8 276; CHECK-NEXT: [[VAR3]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4 277; CHECK-NEXT: [[VAR4]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 8 278; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 279; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 280; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]] 281; CHECK: for.end: 282; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 283; CHECK-NEXT: ret i32 [[VAR5]] 284; 285entry: 286 br label %for.body 287 288for.body: 289 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 290 %p = phi ptr [ %var3, %for.body ], [ %a, %entry ] 291 %q = phi ptr [ %var4, %for.body ], [ %b, %entry ] 292 %var0 = phi i32 [ %var2, %for.body ], [ 0, %entry ] 293 %var1 = load i32, ptr %p, align 8 294 %var2 = add i32 %var1, %var0 295 store ptr %p, ptr %q, align 8 296 %var3 = getelementptr inbounds i32, ptr %p, i32 1 297 %var4 = getelementptr inbounds ptr, ptr %q, i32 1 298 %i.next = add nuw nsw i64 %i, 1 299 %cond = icmp slt i64 %i.next, %n 300 br i1 %cond, label %for.body, label %for.end, !llvm.loop !6 301 302for.end: 303 %var5 = phi i32 [ %var2, %for.body ] 304 ret i32 %var5 305} 306 307define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %ptr) #0 { 308; CHECK-LABEL: @phi_used_in_vector_compare_and_scalar_indvar_update_and_store( 309; CHECK-NEXT: entry: 310; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 311; CHECK: vector.ph: 312; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 313; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 314; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 315; CHECK: vector.body: 316; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] 317; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 318; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 319; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 320; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 321; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1) 322; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]] 323; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[VECTOR_GEP]], zeroinitializer 324; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0 325; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <vscale x 2 x i1> [[TMP6]]) 326; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] 327; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] 328; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 329; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 330; CHECK: middle.block: 331; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 332; CHECK: scalar.ph: 333; CHECK-NEXT: br label [[FOR_BODY:%.*]] 334; CHECK: for.body: 335; CHECK-NEXT: br i1 poison, label [[IF_END_SINK_SPLIT:%.*]], label [[IF_END:%.*]] 336; CHECK: if.end.sink.split: 337; CHECK-NEXT: br label [[IF_END]] 338; CHECK: if.end: 339; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] 340; CHECK: for.end: 341; CHECK-NEXT: ret void 342; 343entry: 344 br label %for.body 345 346for.body: ; preds = %if.end, %entry 347 %iv = phi i64 [ %inc, %if.end ], [ 0, %entry ] 348 %iv.ptr = phi ptr [ %incdec.iv.ptr, %if.end ], [ %ptr, %entry ] 349 %cmp.i = icmp ne ptr %iv.ptr, null 350 br i1 %cmp.i, label %if.end.sink.split, label %if.end 351 352if.end.sink.split: ; preds = %for.body 353 store i16 0, ptr %iv.ptr, align 2 354 br label %if.end 355 356if.end: ; preds = %if.end.sink.split, %for.body 357 %incdec.iv.ptr = getelementptr inbounds i16, ptr %iv.ptr, i64 1 358 %inc = add nuw nsw i64 %iv, 1 359 %exitcond.not = icmp ult i64 %inc, 1024 360 br i1 %exitcond.not, label %for.body, label %for.end, !llvm.loop !6 361 362for.end: ; preds = %if.end, %for.end 363 %iv.ptr.1.lcssa = phi ptr [ %incdec.iv.ptr, %if.end ] 364 ret void 365} 366 367attributes #0 = { vscale_range(1, 16) } 368 369!0 = distinct !{!0, !1, !2, !3, !4, !5} 370!1 = !{!"llvm.loop.mustprogress"} 371!2 = !{!"llvm.loop.vectorize.width", i32 4} 372!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 373!4 = !{!"llvm.loop.vectorize.enable", i1 true} 374!5 = !{!"llvm.loop.interleave.count", i32 2} 375!6 = distinct !{!6, !1, !7, !3, !4, !8} 376!7 = !{!"llvm.loop.vectorize.width", i32 2} 377!8 = !{!"llvm.loop.interleave.count", i32 1} 378