1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -force-tail-folding-style=data-with-evl \ 4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 5; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL 6 7; RUN: opt -passes=loop-vectorize \ 8; RUN: -force-tail-folding-style=none \ 9; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 10; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP 11 12define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %ptr2) { 13; IF-EVL-LABEL: @reverse_load_store( 14; IF-EVL-NEXT: entry: 15; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 16; IF-EVL: vector.ph: 17; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 18; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 19; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 20; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] 21; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 22; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 23; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 24; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 25; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] 26; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 27; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 28; IF-EVL: vector.body: 29; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 30; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 31; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] 32; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 33; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] 34; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 35; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1 36; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] 37; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 38; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] 39; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]] 40; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] 41; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]] 42; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 43; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 44; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] 45; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 46; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] 47; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]] 48; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] 49; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] 50; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 51; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 52; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 53; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]] 54; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 55; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 56; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 57; IF-EVL: middle.block: 58; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] 59; IF-EVL: scalar.ph: 60; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ] 61; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 62; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 63; IF-EVL: for.body: 64; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 65; IF-EVL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 66; IF-EVL-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 67; IF-EVL-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD]] 68; IF-EVL-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 69; IF-EVL-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]] 70; IF-EVL-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 71; IF-EVL-NEXT: [[INC]] = add i32 [[I]], 1 72; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 73; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]], !llvm.loop [[LOOP3:![0-9]+]] 74; IF-EVL: loopend: 75; IF-EVL-NEXT: ret void 76; 77; NO-VP-LABEL: @reverse_load_store( 78; NO-VP-NEXT: entry: 79; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 80; NO-VP: for.body: 81; NO-VP-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 82; NO-VP-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 83; NO-VP-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 84; NO-VP-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[ADD]] 85; NO-VP-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 86; NO-VP-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[ADD]] 87; NO-VP-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 88; NO-VP-NEXT: [[INC]] = add i32 [[I]], 1 89; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 90; NO-VP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND:%.*]] 91; NO-VP: loopend: 92; NO-VP-NEXT: ret void 93; 94entry: 95 br label %for.body 96 97for.body: 98 %add.phi = phi i64 [ %startval, %entry ], [ %add, %for.body ] 99 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 100 %add = add i64 %add.phi, -1 101 %gepl = getelementptr inbounds i32, ptr %ptr, i64 %add 102 %tmp = load i32, ptr %gepl, align 4 103 %geps = getelementptr inbounds i32, ptr %ptr2, i64 %add 104 store i32 %tmp, ptr %geps, align 4 105 %inc = add i32 %i, 1 106 %exitcond = icmp ne i32 %inc, 1024 107 br i1 %exitcond, label %for.body, label %loopend 108 109loopend: 110 ret void 111} 112 113define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noalias %ptr1, ptr noalias %ptr2) { 114; IF-EVL-LABEL: @reverse_load_store_masked( 115; IF-EVL-NEXT: entry: 116; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 117; IF-EVL: vector.ph: 118; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 119; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 120; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 121; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] 122; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 123; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 124; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 125; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 126; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] 127; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 128; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 129; IF-EVL: vector.body: 130; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 131; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 132; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] 133; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 134; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] 135; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 136; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 137; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0 138; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0 139; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 140; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() 141; IF-EVL-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]] 142; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]] 143; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], splat (i64 1023) 144; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP6]], -1 145; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]] 146; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 147; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 148; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 100) 149; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer 150; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] 151; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 152; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] 153; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]] 154; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] 155; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] 156; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 157; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]]) 158; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 159; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] 160; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 161; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] 162; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP27]] 163; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] 164; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] 165; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 166; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) 167; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]]) 168; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 169; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] 170; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 171; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 172; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 173; IF-EVL: middle.block: 174; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] 175; IF-EVL: scalar.ph: 176; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ] 177; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 178; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] 179; IF-EVL: for.body: 180; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] 181; IF-EVL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC]] ] 182; IF-EVL-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 183; IF-EVL-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[I]] 184; IF-EVL-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 185; IF-EVL-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP]], 100 186; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 187; IF-EVL: if.then: 188; IF-EVL-NEXT: [[GEPL1:%.*]] = getelementptr inbounds i32, ptr [[PTR1]], i64 [[ADD]] 189; IF-EVL-NEXT: [[V:%.*]] = load i32, ptr [[GEPL1]], align 4 190; IF-EVL-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]] 191; IF-EVL-NEXT: store i32 [[V]], ptr [[GEPS]], align 4 192; IF-EVL-NEXT: br label [[FOR_INC]] 193; IF-EVL: for.inc: 194; IF-EVL-NEXT: [[INC]] = add i32 [[I]], 1 195; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 196; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]], !llvm.loop [[LOOP5:![0-9]+]] 197; IF-EVL: loopend: 198; IF-EVL-NEXT: ret void 199; 200; NO-VP-LABEL: @reverse_load_store_masked( 201; NO-VP-NEXT: entry: 202; NO-VP-NEXT: br label [[FOR_BODY:%.*]] 203; NO-VP: for.body: 204; NO-VP-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] 205; NO-VP-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ] 206; NO-VP-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 207; NO-VP-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I]] 208; NO-VP-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 209; NO-VP-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP]], 100 210; NO-VP-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 211; NO-VP: if.then: 212; NO-VP-NEXT: [[GEPL1:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i64 [[ADD]] 213; NO-VP-NEXT: [[V:%.*]] = load i32, ptr [[GEPL1]], align 4 214; NO-VP-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[ADD]] 215; NO-VP-NEXT: store i32 [[V]], ptr [[GEPS]], align 4 216; NO-VP-NEXT: br label [[FOR_INC]] 217; NO-VP: for.inc: 218; NO-VP-NEXT: [[INC]] = add i32 [[I]], 1 219; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 220; NO-VP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND:%.*]] 221; NO-VP: loopend: 222; NO-VP-NEXT: ret void 223; 224entry: 225 br label %for.body 226 227for.body: 228 %add.phi = phi i64 [ %startval, %entry ], [ %add, %for.inc ] 229 %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 230 %add = add i64 %add.phi, -1 231 %gepl = getelementptr inbounds i32, ptr %ptr, i32 %i 232 %tmp = load i32, ptr %gepl, align 4 233 %cmp1 = icmp slt i32 %tmp, 100 234 br i1 %cmp1, label %if.then, label %for.inc 235 236if.then: 237 %gepl1 = getelementptr inbounds i32, ptr %ptr1, i64 %add 238 %v = load i32, ptr %gepl1, align 4 239 %geps = getelementptr inbounds i32, ptr %ptr2, i64 %add 240 store i32 %v, ptr %geps, align 4 241 br label %for.inc 242 243for.inc: 244 %inc = add i32 %i, 1 245 %exitcond = icmp ne i32 %inc, 1024 246 br i1 %exitcond, label %for.body, label %loopend 247 248loopend: 249 ret void 250} 251 252; From a miscompile originally reported at 253; https://github.com/llvm/llvm-project/issues/122681 254 255define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) { 256; IF-EVL-LABEL: @multiple_reverse_vector_pointer( 257; IF-EVL-NEXT: entry: 258; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 259; IF-EVL: vector.ph: 260; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 261; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 262; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 263; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 264; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 265; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 266; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 267; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16 268; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[N_VEC]] 269; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] 270; IF-EVL: vector.body: 271; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 272; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 273; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]] 274; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) 275; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]] 276; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 277; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP7]] 278; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 279; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] 280; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]] 281; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]] 282; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] 283; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 284; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 285; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], <vscale x 16 x i8> [[VP_REVERSE]] 286; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 287; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP7]] 288; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 289; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] 290; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] 291; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] 292; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] 293; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 294; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_REVERSE1]], ptr align 1 [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 295; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[TMP7]] 296; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 297; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] 298; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP22]] 299; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] 300; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] 301; IF-EVL-NEXT: [[VP_REVERSE2:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 302; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_REVERSE2]], ptr align 1 [[TMP26]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]]) 303; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 304; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]] 305; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 306; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 307; IF-EVL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 308; IF-EVL: middle.block: 309; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 310; IF-EVL: scalar.ph: 311; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ] 312; IF-EVL-NEXT: br label [[LOOP:%.*]] 313; IF-EVL: loop: 314; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 315; IF-EVL-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 316; IF-EVL-NEXT: [[X:%.*]] = load i8, ptr [[GEP_A]], align 1 317; IF-EVL-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i8 [[X]] 318; IF-EVL-NEXT: [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1 319; IF-EVL-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[C]], i64 [[IV]] 320; IF-EVL-NEXT: store i8 [[Y]], ptr [[GEP_C]], align 1 321; IF-EVL-NEXT: [[GEP_D:%.*]] = getelementptr i8, ptr [[D]], i64 [[IV]] 322; IF-EVL-NEXT: store i8 [[Y]], ptr [[GEP_D]], align 1 323; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 324; IF-EVL-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 325; IF-EVL-NEXT: br i1 [[CMP_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] 326; IF-EVL: exit: 327; IF-EVL-NEXT: ret void 328; 329; NO-VP-LABEL: @multiple_reverse_vector_pointer( 330; NO-VP-NEXT: entry: 331; NO-VP-NEXT: br label [[LOOP:%.*]] 332; NO-VP: loop: 333; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 334; NO-VP-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[IV]] 335; NO-VP-NEXT: [[X:%.*]] = load i8, ptr [[GEP_A]], align 1 336; NO-VP-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B:%.*]], i8 [[X]] 337; NO-VP-NEXT: [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1 338; NO-VP-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[IV]] 339; NO-VP-NEXT: store i8 [[Y]], ptr [[GEP_C]], align 1 340; NO-VP-NEXT: [[GEP_D:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[IV]] 341; NO-VP-NEXT: store i8 [[Y]], ptr [[GEP_D]], align 1 342; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 343; NO-VP-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 344; NO-VP-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]] 345; NO-VP: exit: 346; NO-VP-NEXT: ret void 347; 348entry: 349 br label %loop 350 351loop: 352 %iv = phi i64 [ 1024, %entry ], [ %iv.next, %loop ] 353 354 %gep.a = getelementptr i8, ptr %a, i64 %iv 355 %x = load i8, ptr %gep.a 356 357 %gep.b = getelementptr i8, ptr %b, i8 %x 358 %y = load i8, ptr %gep.b 359 360 %gep.c = getelementptr i8, ptr %c, i64 %iv 361 store i8 %y, ptr %gep.c 362 363 %gep.d = getelementptr i8, ptr %d, i64 %iv 364 store i8 %y, ptr %gep.d 365 366 %iv.next = add i64 %iv, -1 367 %cmp.not = icmp eq i64 %iv, 0 368 br i1 %cmp.not, label %exit, label %loop 369 370exit: 371 ret void 372} 373