1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED 3; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED 4 5target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 6target triple = "x86_64-unknown-linux-gnu" 7 8; (1) Interleave-group with factor 4, storing only 2 members out of the 4. 9; Check that when we allow masked-memops to support interleave-group with gaps, 10; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask. 11; Check that when we don't allow masked-memops to support interleave-group with gaps, 12; the store is scalarized. 13; The input IR was generated from this source: 14; for(i=0;i<1024;i++){ 15; points[i*4] = x[i]; 16; points[i*4 + 1] = y[i]; 17; } 18; (relates to the testcase in PR50566) 19 20; Function Attrs: nofree norecurse nosync nounwind uwtable 21define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) local_unnamed_addr { 22; DISABLED_MASKED_STRIDED-LABEL: @test1( 23; DISABLED_MASKED_STRIDED-NEXT: entry: 24; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 25; DISABLED_MASKED_STRIDED: vector.body: 26; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 27; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 28; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] 29; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 30; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2) 31; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i64 0 32; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS:%.*]], i64 [[TMP2]] 33; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i64 1 34; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP4]] 35; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP1]], i64 2 36; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP6]] 37; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP1]], i64 3 38; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP8]] 39; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0 40; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP10]], ptr [[TMP3]], align 2 41; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1 42; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP11]], ptr [[TMP5]], align 2 43; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2 44; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP12]], ptr [[TMP7]], align 2 45; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 46; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2 47; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] 48; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 2 49; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], splat (i64 1) 50; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP15]], i64 0 51; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP16]] 52; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP15]], i64 1 53; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP18]] 54; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP15]], i64 2 55; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP20]] 56; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP15]], i64 3 57; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP22]] 58; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 0 59; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP24]], ptr [[TMP17]], align 2 60; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 1 61; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP25]], ptr [[TMP19]], align 2 62; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 2 63; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP26]], ptr [[TMP21]], align 2 64; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3 65; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP27]], ptr [[TMP23]], align 2 66; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 67; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 68; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 69; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 70; DISABLED_MASKED_STRIDED: for.end: 71; DISABLED_MASKED_STRIDED-NEXT: ret void 72; 73; ENABLED_MASKED_STRIDED-LABEL: @test1( 74; ENABLED_MASKED_STRIDED-NEXT: entry: 75; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 76; ENABLED_MASKED_STRIDED: vector.body: 77; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 78; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] 79; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 80; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 3 81; ENABLED_MASKED_STRIDED-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.*]], i64 [[TMP1]] 82; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] 83; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 84; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 1, i32 5, i32 poison, i32 poison, i32 2, i32 6, i32 poison, i32 poison, i32 3, i32 7, i32 poison, i32 poison> 85; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], i32 2, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>) 86; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 87; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 88; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 89; ENABLED_MASKED_STRIDED: for.end: 90; ENABLED_MASKED_STRIDED-NEXT: ret void 91; 92entry: 93 br label %for.body 94 95for.body: 96 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 97 %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv 98 %0 = load i16, ptr %arrayidx, align 2 99 %1 = shl nuw nsw i64 %indvars.iv, 2 100 %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1 101 store i16 %0, ptr %arrayidx2, align 2 102 %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv 103 %2 = load i16, ptr %arrayidx4, align 2 104 %3 = or disjoint i64 %1, 1 105 %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3 106 store i16 %2, ptr %arrayidx7, align 2 107 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 108 %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 109 br i1 %exitcond.not, label %for.end, label %for.body 110 111for.end: 112 ret void 113} 114 115; (2) Same as above, but this time the gaps mask of the store is also And-ed with the 116; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is 117; not allowed, the store is scalarized and predicated. 118; The input IR was generated from this source: 119; for(i=0;i<numPoints;i++){ 120; points[i*4] = x[i]; 121; points[i*4 + 1] = y[i]; 122; } 123 124; Function Attrs: nofree norecurse nosync nounwind uwtable 125define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) local_unnamed_addr { 126; DISABLED_MASKED_STRIDED-LABEL: @test2( 127; DISABLED_MASKED_STRIDED-NEXT: entry: 128; DISABLED_MASKED_STRIDED-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0 129; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP15]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 130; DISABLED_MASKED_STRIDED: vector.ph: 131; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[NUMPOINTS]] to i64 132; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 3 133; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 4294967292 134; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 135; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 136; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 137; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 138; DISABLED_MASKED_STRIDED: vector.body: 139; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ] 140; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ] 141; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 142; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] 143; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) 144; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 2) 145; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 146; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 147; DISABLED_MASKED_STRIDED: pred.store.if: 148; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 149; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP4]] 150; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 0 151; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 2 152; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] 153; DISABLED_MASKED_STRIDED: pred.store.continue: 154; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 155; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 156; DISABLED_MASKED_STRIDED: pred.store.if1: 157; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1 158; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP8]] 159; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 1 160; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP10]], ptr [[TMP9]], align 2 161; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE2]] 162; DISABLED_MASKED_STRIDED: pred.store.continue2: 163; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 164; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 165; DISABLED_MASKED_STRIDED: pred.store.if3: 166; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2 167; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP12]] 168; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 2 169; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP14]], ptr [[TMP13]], align 2 170; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE4]] 171; DISABLED_MASKED_STRIDED: pred.store.continue4: 172; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 173; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] 174; DISABLED_MASKED_STRIDED: pred.store.if5: 175; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3 176; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]] 177; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 3 178; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP18]], ptr [[TMP17]], align 2 179; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] 180; DISABLED_MASKED_STRIDED: pred.store.continue6: 181; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] 182; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP19]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) 183; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1) 184; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 185; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] 186; DISABLED_MASKED_STRIDED: pred.store.if8: 187; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i64 0 188; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP22]] 189; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 0 190; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 191; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] 192; DISABLED_MASKED_STRIDED: pred.store.continue9: 193; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 194; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] 195; DISABLED_MASKED_STRIDED: pred.store.if10: 196; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i64 1 197; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP26]] 198; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 1 199; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP28]], ptr [[TMP27]], align 2 200; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] 201; DISABLED_MASKED_STRIDED: pred.store.continue11: 202; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 203; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] 204; DISABLED_MASKED_STRIDED: pred.store.if12: 205; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP20]], i64 2 206; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP30]] 207; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 2 208; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP32]], ptr [[TMP31]], align 2 209; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] 210; DISABLED_MASKED_STRIDED: pred.store.continue13: 211; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 212; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]] 213; DISABLED_MASKED_STRIDED: pred.store.if14: 214; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3 215; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP34]] 216; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 3 217; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP36]], ptr [[TMP35]], align 2 218; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] 219; DISABLED_MASKED_STRIDED: pred.store.continue15: 220; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 221; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 222; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 223; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP37]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 224; DISABLED_MASKED_STRIDED: for.end.loopexit: 225; DISABLED_MASKED_STRIDED-NEXT: br label [[FOR_END]] 226; DISABLED_MASKED_STRIDED: for.end: 227; DISABLED_MASKED_STRIDED-NEXT: ret void 228; 229; ENABLED_MASKED_STRIDED-LABEL: @test2( 230; ENABLED_MASKED_STRIDED-NEXT: entry: 231; ENABLED_MASKED_STRIDED-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0 232; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP15]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 233; ENABLED_MASKED_STRIDED: vector.ph: 234; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[NUMPOINTS]] to i64 235; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 3 236; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 4294967292 237; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 238; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 239; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 240; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 241; ENABLED_MASKED_STRIDED: vector.body: 242; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 243; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 244; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer 245; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3> 246; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] 247; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] 248; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) 249; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDEX]], 3 250; ENABLED_MASKED_STRIDED-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.*]], i64 [[TMP2]] 251; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] 252; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP3]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) 253; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 1, i32 5, i32 poison, i32 poison, i32 2, i32 6, i32 poison, i32 poison, i32 3, i32 7, i32 poison, i32 poison> 254; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> 255; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false> 256; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], i32 2, <16 x i1> [[TMP5]]) 257; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 258; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 259; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 260; ENABLED_MASKED_STRIDED: for.end.loopexit: 261; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_END]] 262; ENABLED_MASKED_STRIDED: for.end: 263; ENABLED_MASKED_STRIDED-NEXT: ret void 264; 265entry: 266 %cmp15 = icmp sgt i32 %numPoints, 0 267 br i1 %cmp15, label %for.body.preheader, label %for.end 268 269for.body.preheader: 270 %wide.trip.count = zext i32 %numPoints to i64 271 br label %for.body 272 273for.body: 274 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 275 %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv 276 %0 = load i16, ptr %arrayidx, align 2 277 %1 = shl nsw i64 %indvars.iv, 2 278 %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1 279 store i16 %0, ptr %arrayidx2, align 2 280 %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv 281 %2 = load i16, ptr %arrayidx4, align 2 282 %3 = or disjoint i64 %1, 1 283 %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3 284 store i16 %2, ptr %arrayidx7, align 2 285 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 286 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 287 br i1 %exitcond.not, label %for.end.loopexit, label %for.body 288 289for.end.loopexit: 290 br label %for.end 291 292for.end: 293 ret void 294} 295 296; (3) Testing a scenario of a conditional store. The gaps mask of the store is also 297; And-ed with the condition mask (x[i] > 0). 298; If using masked memops to vectorize interleaved-group with gaps is 299; not allowed, the store is scalarized and predicated. 300; Here the Interleave-group is with factor 3, storing only 1 member out of the 3. 301; The input IR was generated from this source: 302; for(i=0;i<1024;i++){ 303; if (x[i] > 0) 304; points[i*3] = x[i]; 305; } 306; Function Attrs: nofree norecurse nosync nounwind uwtable 307define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) local_unnamed_addr { 308; DISABLED_MASKED_STRIDED-LABEL: @test( 309; DISABLED_MASKED_STRIDED-NEXT: entry: 310; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 311; DISABLED_MASKED_STRIDED: vector.body: 312; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 313; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] 314; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] 315; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 316; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer 317; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3) 318; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 319; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 320; DISABLED_MASKED_STRIDED: pred.store.if: 321; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 322; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS:%.*]], i64 [[TMP4]] 323; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0 324; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 2 325; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] 326; DISABLED_MASKED_STRIDED: pred.store.continue: 327; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 328; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 329; DISABLED_MASKED_STRIDED: pred.store.if1: 330; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1 331; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP8]] 332; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1 333; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP10]], ptr [[TMP9]], align 2 334; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE2]] 335; DISABLED_MASKED_STRIDED: pred.store.continue2: 336; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 337; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 338; DISABLED_MASKED_STRIDED: pred.store.if3: 339; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2 340; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP12]] 341; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2 342; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP14]], ptr [[TMP13]], align 2 343; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE4]] 344; DISABLED_MASKED_STRIDED: pred.store.continue4: 345; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 346; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 347; DISABLED_MASKED_STRIDED: pred.store.if5: 348; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3 349; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP16]] 350; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 351; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP18]], ptr [[TMP17]], align 2 352; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] 353; DISABLED_MASKED_STRIDED: pred.store.continue6: 354; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 355; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 356; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 357; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 358; DISABLED_MASKED_STRIDED: for.end: 359; DISABLED_MASKED_STRIDED-NEXT: ret void 360; 361; ENABLED_MASKED_STRIDED-LABEL: @test( 362; ENABLED_MASKED_STRIDED-NEXT: entry: 363; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 364; ENABLED_MASKED_STRIDED: vector.body: 365; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 366; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] 367; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] 368; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 369; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer 370; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3) 371; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 372; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 373; ENABLED_MASKED_STRIDED: pred.store.if: 374; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 375; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS:%.*]], i64 [[TMP4]] 376; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0 377; ENABLED_MASKED_STRIDED-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 2 378; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] 379; ENABLED_MASKED_STRIDED: pred.store.continue: 380; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 381; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 382; ENABLED_MASKED_STRIDED: pred.store.if1: 383; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1 384; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP8]] 385; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1 386; ENABLED_MASKED_STRIDED-NEXT: store i16 [[TMP10]], ptr [[TMP9]], align 2 387; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE2]] 388; ENABLED_MASKED_STRIDED: pred.store.continue2: 389; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 390; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 391; ENABLED_MASKED_STRIDED: pred.store.if3: 392; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2 393; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP12]] 394; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2 395; ENABLED_MASKED_STRIDED-NEXT: store i16 [[TMP14]], ptr [[TMP13]], align 2 396; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE4]] 397; ENABLED_MASKED_STRIDED: pred.store.continue4: 398; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 399; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 400; ENABLED_MASKED_STRIDED: pred.store.if5: 401; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3 402; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP16]] 403; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 404; ENABLED_MASKED_STRIDED-NEXT: store i16 [[TMP18]], ptr [[TMP17]], align 2 405; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] 406; ENABLED_MASKED_STRIDED: pred.store.continue6: 407; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 408; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 409; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 410; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 411; ENABLED_MASKED_STRIDED: for.end: 412; ENABLED_MASKED_STRIDED-NEXT: ret void 413; 414entry: 415 br label %for.body 416 417for.body: 418 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 419 %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv 420 %0 = load i16, ptr %arrayidx, align 2 421 %cmp1 = icmp sgt i16 %0, 0 422 br i1 %cmp1, label %if.then, label %for.inc 423 424if.then: 425 %1 = mul nuw nsw i64 %indvars.iv, 3 426 %arrayidx6 = getelementptr inbounds i16, ptr %points, i64 %1 427 store i16 %0, ptr %arrayidx6, align 2 428 br label %for.inc 429 430for.inc: 431 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 432 %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 433 br i1 %exitcond.not, label %for.end, label %for.body 434 435for.end: 436 ret void 437} 438