1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1 3; RUN: opt < %s -passes=loop-vectorize -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2 4; RUN: opt < %s -passes=loop-vectorize -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 5 6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7target triple = "x86_64-pc_linux" 8 9; The source code: 10; 11;void foo1(int *A, int *B, int *trigger) { 12; 13; for (int i=0; i<10000; i++) { 14; if (trigger[i] < 100) { 15; A[i] = B[i] + trigger[i]; 16; } 17; } 18;} 19 20define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture readonly %trigger) local_unnamed_addr #0 { 21; AVX1-LABEL: @foo1( 22; AVX1-NEXT: entry: 23; AVX1-NEXT: [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64 24; AVX1-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr [[TRIGGER:%.*]] to i64 25; AVX1-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 26; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 27; AVX1: vector.memcheck: 28; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 29; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 30; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 31; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 32; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 33; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 34; AVX1: vector.ph: 35; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 36; AVX1: vector.body: 37; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 38; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 39; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] 40; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 41; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 42; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 43; AVX1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] 44; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 45; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison) 46; AVX1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 47; AVX1-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP2]] 48; AVX1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 49; AVX1-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP8]], ptr [[TMP10]], i32 4, <8 x i1> [[TMP5]]) 50; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 51; AVX1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 52; AVX1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 53; AVX1: middle.block: 54; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 55; AVX1: scalar.ph: 56; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 57; AVX1-NEXT: br label [[FOR_BODY:%.*]] 58; AVX1: for.body: 59; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 60; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 61; AVX1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 62; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP12]], 100 63; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 64; AVX1: if.then: 65; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 66; AVX1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 67; AVX1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] 68; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 69; AVX1-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX7]], align 4 70; AVX1-NEXT: br label [[FOR_INC]] 71; AVX1: for.inc: 72; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 73; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 74; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 75; AVX1: for.end: 76; AVX1-NEXT: ret void 77; 78; AVX2-LABEL: @foo1( 79; AVX2-NEXT: iter.check: 80; AVX2-NEXT: [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64 81; AVX2-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr [[TRIGGER:%.*]] to i64 82; AVX2-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 83; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 84; AVX2: vector.memcheck: 85; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 86; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 87; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 88; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 89; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 90; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 91; AVX2: vector.main.loop.iter.check: 92; AVX2-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 93; AVX2: vector.ph: 94; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 95; AVX2: vector.body: 96; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 97; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 98; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] 99; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 100; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 101; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 102; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 103; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 104; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 105; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 106; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 107; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 108; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) 109; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) 110; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) 111; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] 112; AVX2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 113; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8 114; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 115; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 24 116; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP13]], i32 4, <8 x i1> [[TMP8]], <8 x i32> poison) 117; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP14]], i32 4, <8 x i1> [[TMP9]], <8 x i32> poison) 118; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP15]], i32 4, <8 x i1> [[TMP10]], <8 x i32> poison) 119; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP16]], i32 4, <8 x i1> [[TMP11]], <8 x i32> poison) 120; AVX2-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 121; AVX2-NEXT: [[TMP18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 122; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 123; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 124; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP2]] 125; AVX2-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i32 0 126; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 8 127; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 128; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 24 129; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP17]], ptr [[TMP22]], i32 4, <8 x i1> [[TMP8]]) 130; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP18]], ptr [[TMP23]], i32 4, <8 x i1> [[TMP9]]) 131; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP19]], ptr [[TMP24]], i32 4, <8 x i1> [[TMP10]]) 132; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr [[TMP25]], i32 4, <8 x i1> [[TMP11]]) 133; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 134; AVX2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 135; AVX2-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 136; AVX2: middle.block: 137; AVX2-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 138; AVX2: vec.epilog.iter.check: 139; AVX2-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] 140; AVX2: vec.epilog.ph: 141; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 142; AVX2-NEXT: br label [[FOR_BODY:%.*]] 143; AVX2: vec.epilog.vector.body: 144; AVX2-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[FOR_BODY]] ] 145; AVX2-NEXT: [[TMP37:%.*]] = add i64 [[INDEX11]], 0 146; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP37]] 147; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 0 148; AVX2-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP29]], align 4 149; AVX2-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) 150; AVX2-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP37]] 151; AVX2-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i32 0 152; AVX2-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison) 153; AVX2-NEXT: [[TMP33:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD13]], [[WIDE_LOAD12]] 154; AVX2-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP37]] 155; AVX2-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i32 0 156; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP33]], ptr [[TMP35]], i32 4, <8 x i1> [[TMP30]]) 157; AVX2-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 158; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 10000 159; AVX2-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 160; AVX2: vec.epilog.middle.block: 161; AVX2-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] 162; AVX2: vec.epilog.scalar.ph: 163; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 164; AVX2-NEXT: br label [[FOR_BODY1:%.*]] 165; AVX2: for.body: 166; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 167; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 168; AVX2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 169; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP27]], 100 170; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 171; AVX2: if.then: 172; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 173; AVX2-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 174; AVX2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP27]] 175; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 176; AVX2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX7]], align 4 177; AVX2-NEXT: br label [[FOR_INC]] 178; AVX2: for.inc: 179; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 180; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 181; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] 182; AVX2: for.end: 183; AVX2-NEXT: ret void 184; 185; AVX512-LABEL: @foo1( 186; AVX512-NEXT: iter.check: 187; AVX512-NEXT: [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64 188; AVX512-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr [[TRIGGER:%.*]] to i64 189; AVX512-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 190; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 191; AVX512: vector.memcheck: 192; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 193; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 194; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 195; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 196; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 197; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 198; AVX512: vector.main.loop.iter.check: 199; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 200; AVX512: vector.ph: 201; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 202; AVX512: vector.body: 203; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 204; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 205; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] 206; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 207; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 208; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 209; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 210; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 211; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 212; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 213; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 214; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) 215; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) 216; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) 217; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) 218; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] 219; AVX512-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 220; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 221; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 32 222; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 48 223; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP13]], i32 4, <16 x i1> [[TMP8]], <16 x i32> poison) 224; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP14]], i32 4, <16 x i1> [[TMP9]], <16 x i32> poison) 225; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP15]], i32 4, <16 x i1> [[TMP10]], <16 x i32> poison) 226; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP16]], i32 4, <16 x i1> [[TMP11]], <16 x i32> poison) 227; AVX512-NEXT: [[TMP17:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 228; AVX512-NEXT: [[TMP18:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 229; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 230; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 231; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP2]] 232; AVX512-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i32 0 233; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 234; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 32 235; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 48 236; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP17]], ptr [[TMP22]], i32 4, <16 x i1> [[TMP8]]) 237; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP18]], ptr [[TMP23]], i32 4, <16 x i1> [[TMP9]]) 238; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP19]], ptr [[TMP24]], i32 4, <16 x i1> [[TMP10]]) 239; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP20]], ptr [[TMP25]], i32 4, <16 x i1> [[TMP11]]) 240; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 241; AVX512-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 242; AVX512-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 243; AVX512: middle.block: 244; AVX512-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 245; AVX512: vec.epilog.iter.check: 246; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 247; AVX512: vec.epilog.ph: 248; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 249; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 250; AVX512: vec.epilog.vector.body: 251; AVX512-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 252; AVX512-NEXT: [[TMP27:%.*]] = add i64 [[INDEX11]], 0 253; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP27]] 254; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0 255; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i32>, ptr [[TMP29]], align 4 256; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD12]], splat (i32 100) 257; AVX512-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP27]] 258; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i32 0 259; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP32]], i32 4, <16 x i1> [[TMP30]], <16 x i32> poison) 260; AVX512-NEXT: [[TMP33:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD13]], [[WIDE_LOAD12]] 261; AVX512-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP27]] 262; AVX512-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i32 0 263; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP33]], ptr [[TMP35]], i32 4, <16 x i1> [[TMP30]]) 264; AVX512-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 16 265; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 10000 266; AVX512-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 267; AVX512: vec.epilog.middle.block: 268; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] 269; AVX512: vec.epilog.scalar.ph: 270; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 271; AVX512-NEXT: br label [[FOR_BODY:%.*]] 272; AVX512: for.body: 273; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 274; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 275; AVX512-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 276; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP37]], 100 277; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 278; AVX512: if.then: 279; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 280; AVX512-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 281; AVX512-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP38]], [[TMP37]] 282; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 283; AVX512-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX7]], align 4 284; AVX512-NEXT: br label [[FOR_INC]] 285; AVX512: for.inc: 286; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 287; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 288; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 289; AVX512: for.end: 290; AVX512-NEXT: ret void 291; 292entry: 293 br label %for.body 294 295for.body: ; preds = %for.inc, %entry 296 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 297 %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv 298 %0 = load i32, ptr %arrayidx, align 4 299 %cmp1 = icmp slt i32 %0, 100 300 br i1 %cmp1, label %if.then, label %for.inc 301 302if.then: ; preds = %for.body 303 %arrayidx3 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 304 %1 = load i32, ptr %arrayidx3, align 4 305 %add = add nsw i32 %1, %0 306 %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 307 store i32 %add, ptr %arrayidx7, align 4 308 br label %for.inc 309 310for.inc: ; preds = %for.body, %if.then 311 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 312 %exitcond = icmp eq i64 %indvars.iv.next, 10000 313 br i1 %exitcond, label %for.end, label %for.body 314 315for.end: ; preds = %for.inc 316 ret void 317} 318 319; The same as @foo1 but all the pointers are address space 1 pointers. 320 321define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) nocapture readonly %B, ptr addrspace(1) nocapture readonly %trigger) local_unnamed_addr #0 { 322; AVX1-LABEL: @foo1_addrspace1( 323; AVX1-NEXT: entry: 324; AVX1-NEXT: [[B3:%.*]] = ptrtoint ptr addrspace(1) [[B:%.*]] to i64 325; AVX1-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr addrspace(1) [[TRIGGER:%.*]] to i64 326; AVX1-NEXT: [[A1:%.*]] = ptrtoint ptr addrspace(1) [[A:%.*]] to i64 327; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 328; AVX1: vector.memcheck: 329; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 330; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 331; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 332; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 333; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 334; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 335; AVX1: vector.ph: 336; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 337; AVX1: vector.body: 338; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 339; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 340; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP2]] 341; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 0 342; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP4]], align 4 343; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 344; AVX1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] 345; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP6]], i32 0 346; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison) 347; AVX1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 348; AVX1-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[TMP2]] 349; AVX1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP9]], i32 0 350; AVX1-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP8]], ptr addrspace(1) [[TMP10]], i32 4, <8 x i1> [[TMP5]]) 351; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 352; AVX1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 353; AVX1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 354; AVX1: middle.block: 355; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 356; AVX1: scalar.ph: 357; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 358; AVX1-NEXT: br label [[FOR_BODY:%.*]] 359; AVX1: for.body: 360; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 361; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDVARS_IV]] 362; AVX1-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4 363; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP12]], 100 364; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 365; AVX1: if.then: 366; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDVARS_IV]] 367; AVX1-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX3]], align 4 368; AVX1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] 369; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDVARS_IV]] 370; AVX1-NEXT: store i32 [[ADD]], ptr addrspace(1) [[ARRAYIDX7]], align 4 371; AVX1-NEXT: br label [[FOR_INC]] 372; AVX1: for.inc: 373; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 374; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 375; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 376; AVX1: for.end: 377; AVX1-NEXT: ret void 378; 379; AVX2-LABEL: @foo1_addrspace1( 380; AVX2-NEXT: iter.check: 381; AVX2-NEXT: [[B3:%.*]] = ptrtoint ptr addrspace(1) [[B:%.*]] to i64 382; AVX2-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr addrspace(1) [[TRIGGER:%.*]] to i64 383; AVX2-NEXT: [[A1:%.*]] = ptrtoint ptr addrspace(1) [[A:%.*]] to i64 384; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 385; AVX2: vector.memcheck: 386; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 387; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 388; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 389; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 390; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 391; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 392; AVX2: vector.main.loop.iter.check: 393; AVX2-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 394; AVX2: vector.ph: 395; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 396; AVX2: vector.body: 397; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 398; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 399; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP2]] 400; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 0 401; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 8 402; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 403; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 24 404; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP4]], align 4 405; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4 406; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4 407; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP7]], align 4 408; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 409; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) 410; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) 411; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) 412; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] 413; AVX2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0 414; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8 415; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 416; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 24 417; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP13]], i32 4, <8 x i1> [[TMP8]], <8 x i32> poison) 418; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP14]], i32 4, <8 x i1> [[TMP9]], <8 x i32> poison) 419; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP15]], i32 4, <8 x i1> [[TMP10]], <8 x i32> poison) 420; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP16]], i32 4, <8 x i1> [[TMP11]], <8 x i32> poison) 421; AVX2-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 422; AVX2-NEXT: [[TMP18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 423; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 424; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 425; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[TMP2]] 426; AVX2-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 0 427; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 8 428; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 429; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 24 430; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP17]], ptr addrspace(1) [[TMP22]], i32 4, <8 x i1> [[TMP8]]) 431; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP18]], ptr addrspace(1) [[TMP23]], i32 4, <8 x i1> [[TMP9]]) 432; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP19]], ptr addrspace(1) [[TMP24]], i32 4, <8 x i1> [[TMP10]]) 433; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP20]], ptr addrspace(1) [[TMP25]], i32 4, <8 x i1> [[TMP11]]) 434; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 435; AVX2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 436; AVX2-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 437; AVX2: middle.block: 438; AVX2-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 439; AVX2: vec.epilog.iter.check: 440; AVX2-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] 441; AVX2: vec.epilog.ph: 442; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 443; AVX2-NEXT: br label [[FOR_BODY:%.*]] 444; AVX2: vec.epilog.vector.body: 445; AVX2-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[FOR_BODY]] ] 446; AVX2-NEXT: [[TMP37:%.*]] = add i64 [[INDEX11]], 0 447; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP37]] 448; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP38]], i32 0 449; AVX2-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP29]], align 4 450; AVX2-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) 451; AVX2-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP37]] 452; AVX2-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP31]], i32 0 453; AVX2-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison) 454; AVX2-NEXT: [[TMP33:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD13]], [[WIDE_LOAD12]] 455; AVX2-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[TMP37]] 456; AVX2-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP34]], i32 0 457; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP33]], ptr addrspace(1) [[TMP35]], i32 4, <8 x i1> [[TMP30]]) 458; AVX2-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 459; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 10000 460; AVX2-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 461; AVX2: vec.epilog.middle.block: 462; AVX2-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] 463; AVX2: vec.epilog.scalar.ph: 464; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 465; AVX2-NEXT: br label [[FOR_BODY1:%.*]] 466; AVX2: for.body: 467; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 468; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDVARS_IV]] 469; AVX2-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4 470; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP27]], 100 471; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 472; AVX2: if.then: 473; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDVARS_IV]] 474; AVX2-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX3]], align 4 475; AVX2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP27]] 476; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDVARS_IV]] 477; AVX2-NEXT: store i32 [[ADD]], ptr addrspace(1) [[ARRAYIDX7]], align 4 478; AVX2-NEXT: br label [[FOR_INC]] 479; AVX2: for.inc: 480; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 481; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 482; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP7:![0-9]+]] 483; AVX2: for.end: 484; AVX2-NEXT: ret void 485; 486; AVX512-LABEL: @foo1_addrspace1( 487; AVX512-NEXT: iter.check: 488; AVX512-NEXT: [[B3:%.*]] = ptrtoint ptr addrspace(1) [[B:%.*]] to i64 489; AVX512-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr addrspace(1) [[TRIGGER:%.*]] to i64 490; AVX512-NEXT: [[A1:%.*]] = ptrtoint ptr addrspace(1) [[A:%.*]] to i64 491; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 492; AVX512: vector.memcheck: 493; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 494; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 495; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 496; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 497; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 498; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 499; AVX512: vector.main.loop.iter.check: 500; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 501; AVX512: vector.ph: 502; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 503; AVX512: vector.body: 504; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 505; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 506; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP2]] 507; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 0 508; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 509; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 32 510; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 48 511; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP4]], align 4 512; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4 513; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4 514; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP7]], align 4 515; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) 516; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) 517; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) 518; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) 519; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] 520; AVX512-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0 521; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 522; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 32 523; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 48 524; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) [[TMP13]], i32 4, <16 x i1> [[TMP8]], <16 x i32> poison) 525; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) [[TMP14]], i32 4, <16 x i1> [[TMP9]], <16 x i32> poison) 526; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) [[TMP15]], i32 4, <16 x i1> [[TMP10]], <16 x i32> poison) 527; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) [[TMP16]], i32 4, <16 x i1> [[TMP11]], <16 x i32> poison) 528; AVX512-NEXT: [[TMP17:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 529; AVX512-NEXT: [[TMP18:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 530; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 531; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 532; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[TMP2]] 533; AVX512-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 0 534; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 535; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 32 536; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 48 537; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP17]], ptr addrspace(1) [[TMP22]], i32 4, <16 x i1> [[TMP8]]) 538; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP18]], ptr addrspace(1) [[TMP23]], i32 4, <16 x i1> [[TMP9]]) 539; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP19]], ptr addrspace(1) [[TMP24]], i32 4, <16 x i1> [[TMP10]]) 540; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP20]], ptr addrspace(1) [[TMP25]], i32 4, <16 x i1> [[TMP11]]) 541; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 542; AVX512-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 543; AVX512-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 544; AVX512: middle.block: 545; AVX512-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 546; AVX512: vec.epilog.iter.check: 547; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 548; AVX512: vec.epilog.ph: 549; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 550; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 551; AVX512: vec.epilog.vector.body: 552; AVX512-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 553; AVX512-NEXT: [[TMP27:%.*]] = add i64 [[INDEX11]], 0 554; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP27]] 555; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP28]], i32 0 556; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP29]], align 4 557; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD12]], splat (i32 100) 558; AVX512-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP27]] 559; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP31]], i32 0 560; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) [[TMP32]], i32 4, <16 x i1> [[TMP30]], <16 x i32> poison) 561; AVX512-NEXT: [[TMP33:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD13]], [[WIDE_LOAD12]] 562; AVX512-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[TMP27]] 563; AVX512-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP34]], i32 0 564; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP33]], ptr addrspace(1) [[TMP35]], i32 4, <16 x i1> [[TMP30]]) 565; AVX512-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 16 566; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 10000 567; AVX512-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 568; AVX512: vec.epilog.middle.block: 569; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] 570; AVX512: vec.epilog.scalar.ph: 571; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 572; AVX512-NEXT: br label [[FOR_BODY:%.*]] 573; AVX512: for.body: 574; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 575; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDVARS_IV]] 576; AVX512-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4 577; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP37]], 100 578; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 579; AVX512: if.then: 580; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDVARS_IV]] 581; AVX512-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX3]], align 4 582; AVX512-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP38]], [[TMP37]] 583; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDVARS_IV]] 584; AVX512-NEXT: store i32 [[ADD]], ptr addrspace(1) [[ARRAYIDX7]], align 4 585; AVX512-NEXT: br label [[FOR_INC]] 586; AVX512: for.inc: 587; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 588; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 589; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 590; AVX512: for.end: 591; AVX512-NEXT: ret void 592; 593entry: 594 br label %for.body 595 596for.body: ; preds = %for.inc, %entry 597 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 598 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %trigger, i64 %indvars.iv 599 %0 = load i32, ptr addrspace(1) %arrayidx, align 4 600 %cmp1 = icmp slt i32 %0, 100 601 br i1 %cmp1, label %if.then, label %for.inc 602 603if.then: ; preds = %for.body 604 %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %B, i64 %indvars.iv 605 %1 = load i32, ptr addrspace(1) %arrayidx3, align 4 606 %add = add nsw i32 %1, %0 607 %arrayidx7 = getelementptr inbounds i32, ptr addrspace(1) %A, i64 %indvars.iv 608 store i32 %add, ptr addrspace(1) %arrayidx7, align 4 609 br label %for.inc 610 611for.inc: ; preds = %for.body, %if.then 612 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 613 %exitcond = icmp eq i64 %indvars.iv.next, 10000 614 br i1 %exitcond, label %for.end, label %for.body 615 616for.end: ; preds = %for.inc 617 ret void 618} 619 620; The source code: 621; 622;void foo2(ptr A, ptr B, int *trigger) { 623; 624; for (int i=0; i<10000; i++) { 625; if (trigger[i] < 100) { 626; A[i] = B[i] + trigger[i]; 627; } 628; } 629;} 630 631define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture readonly %trigger) local_unnamed_addr #0 { 632; AVX1-LABEL: @foo2( 633; AVX1-NEXT: entry: 634; AVX1-NEXT: [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64 635; AVX1-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr [[TRIGGER:%.*]] to i64 636; AVX1-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 637; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 638; AVX1: vector.memcheck: 639; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 640; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 641; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 642; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 643; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 644; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 645; AVX1: vector.ph: 646; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 647; AVX1: vector.body: 648; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 649; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 650; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] 651; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 652; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 653; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 654; AVX1-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] 655; AVX1-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 656; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x float> poison) 657; AVX1-NEXT: [[TMP8:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float> 658; AVX1-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP8]] 659; AVX1-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP2]] 660; AVX1-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 661; AVX1-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP9]], ptr [[TMP11]], i32 4, <8 x i1> [[TMP5]]) 662; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 663; AVX1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 664; AVX1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 665; AVX1: middle.block: 666; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 667; AVX1: scalar.ph: 668; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 669; AVX1-NEXT: br label [[FOR_BODY:%.*]] 670; AVX1: for.body: 671; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 672; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 673; AVX1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 674; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP13]], 100 675; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 676; AVX1: if.then: 677; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] 678; AVX1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 679; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to float 680; AVX1-NEXT: [[ADD:%.*]] = fadd float [[TMP14]], [[CONV]] 681; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 682; AVX1-NEXT: store float [[ADD]], ptr [[ARRAYIDX7]], align 4 683; AVX1-NEXT: br label [[FOR_INC]] 684; AVX1: for.inc: 685; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 686; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 687; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 688; AVX1: for.end: 689; AVX1-NEXT: ret void 690; 691; AVX2-LABEL: @foo2( 692; AVX2-NEXT: iter.check: 693; AVX2-NEXT: [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64 694; AVX2-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr [[TRIGGER:%.*]] to i64 695; AVX2-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 696; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 697; AVX2: vector.memcheck: 698; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 699; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 700; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 701; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 702; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 703; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 704; AVX2: vector.main.loop.iter.check: 705; AVX2-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 706; AVX2: vector.ph: 707; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 708; AVX2: vector.body: 709; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 710; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 711; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] 712; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 713; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 714; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 715; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 716; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 717; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 718; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 719; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 720; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 721; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) 722; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) 723; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) 724; AVX2-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] 725; AVX2-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 726; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8 727; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 728; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 24 729; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP13]], i32 4, <8 x i1> [[TMP8]], <8 x float> poison) 730; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP14]], i32 4, <8 x i1> [[TMP9]], <8 x float> poison) 731; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP15]], i32 4, <8 x i1> [[TMP10]], <8 x float> poison) 732; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP16]], i32 4, <8 x i1> [[TMP11]], <8 x float> poison) 733; AVX2-NEXT: [[TMP17:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float> 734; AVX2-NEXT: [[TMP18:%.*]] = sitofp <8 x i32> [[WIDE_LOAD5]] to <8 x float> 735; AVX2-NEXT: [[TMP19:%.*]] = sitofp <8 x i32> [[WIDE_LOAD6]] to <8 x float> 736; AVX2-NEXT: [[TMP20:%.*]] = sitofp <8 x i32> [[WIDE_LOAD7]] to <8 x float> 737; AVX2-NEXT: [[TMP21:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP17]] 738; AVX2-NEXT: [[TMP22:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD8]], [[TMP18]] 739; AVX2-NEXT: [[TMP23:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] 740; AVX2-NEXT: [[TMP24:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] 741; AVX2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP2]] 742; AVX2-NEXT: [[TMP26:%.*]] = getelementptr float, ptr [[TMP25]], i32 0 743; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 8 744; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 745; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 24 746; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP21]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP8]]) 747; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP22]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP9]]) 748; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP23]], ptr [[TMP28]], i32 4, <8 x i1> [[TMP10]]) 749; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP24]], ptr [[TMP29]], i32 4, <8 x i1> [[TMP11]]) 750; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 751; AVX2-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 752; AVX2-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 753; AVX2: middle.block: 754; AVX2-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 755; AVX2: vec.epilog.iter.check: 756; AVX2-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] 757; AVX2: vec.epilog.ph: 758; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 759; AVX2-NEXT: br label [[FOR_BODY:%.*]] 760; AVX2: vec.epilog.vector.body: 761; AVX2-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[FOR_BODY]] ] 762; AVX2-NEXT: [[TMP42:%.*]] = add i64 [[INDEX11]], 0 763; AVX2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP42]] 764; AVX2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 0 765; AVX2-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4 766; AVX2-NEXT: [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) 767; AVX2-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP42]] 768; AVX2-NEXT: [[TMP36:%.*]] = getelementptr float, ptr [[TMP35]], i32 0 769; AVX2-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP36]], i32 4, <8 x i1> [[TMP34]], <8 x float> poison) 770; AVX2-NEXT: [[TMP37:%.*]] = sitofp <8 x i32> [[WIDE_LOAD12]] to <8 x float> 771; AVX2-NEXT: [[TMP38:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD13]], [[TMP37]] 772; AVX2-NEXT: [[TMP39:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP42]] 773; AVX2-NEXT: [[TMP40:%.*]] = getelementptr float, ptr [[TMP39]], i32 0 774; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP38]], ptr [[TMP40]], i32 4, <8 x i1> [[TMP34]]) 775; AVX2-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 776; AVX2-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 10000 777; AVX2-NEXT: br i1 [[TMP41]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 778; AVX2: vec.epilog.middle.block: 779; AVX2-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] 780; AVX2: vec.epilog.scalar.ph: 781; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 782; AVX2-NEXT: br label [[FOR_BODY1:%.*]] 783; AVX2: for.body: 784; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 785; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 786; AVX2-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 787; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP31]], 100 788; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 789; AVX2: if.then: 790; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] 791; AVX2-NEXT: [[TMP32:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 792; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP31]] to float 793; AVX2-NEXT: [[ADD:%.*]] = fadd float [[TMP32]], [[CONV]] 794; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 795; AVX2-NEXT: store float [[ADD]], ptr [[ARRAYIDX7]], align 4 796; AVX2-NEXT: br label [[FOR_INC]] 797; AVX2: for.inc: 798; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 799; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 800; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP10:![0-9]+]] 801; AVX2: for.end: 802; AVX2-NEXT: ret void 803; 804; AVX512-LABEL: @foo2( 805; AVX512-NEXT: iter.check: 806; AVX512-NEXT: [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64 807; AVX512-NEXT: [[TRIGGER2:%.*]] = ptrtoint ptr [[TRIGGER:%.*]] to i64 808; AVX512-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 809; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 810; AVX512: vector.memcheck: 811; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 812; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 813; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 814; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 815; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 816; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 817; AVX512: vector.main.loop.iter.check: 818; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 819; AVX512: vector.ph: 820; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 821; AVX512: vector.body: 822; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 823; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 824; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] 825; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 826; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 827; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 828; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 829; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 830; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 831; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 832; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 833; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) 834; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) 835; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) 836; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) 837; AVX512-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] 838; AVX512-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 839; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 840; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 32 841; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 48 842; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[TMP13]], i32 4, <16 x i1> [[TMP8]], <16 x float> poison) 843; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[TMP14]], i32 4, <16 x i1> [[TMP9]], <16 x float> poison) 844; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[TMP15]], i32 4, <16 x i1> [[TMP10]], <16 x float> poison) 845; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[TMP16]], i32 4, <16 x i1> [[TMP11]], <16 x float> poison) 846; AVX512-NEXT: [[TMP17:%.*]] = sitofp <16 x i32> [[WIDE_LOAD]] to <16 x float> 847; AVX512-NEXT: [[TMP18:%.*]] = sitofp <16 x i32> [[WIDE_LOAD5]] to <16 x float> 848; AVX512-NEXT: [[TMP19:%.*]] = sitofp <16 x i32> [[WIDE_LOAD6]] to <16 x float> 849; AVX512-NEXT: [[TMP20:%.*]] = sitofp <16 x i32> [[WIDE_LOAD7]] to <16 x float> 850; AVX512-NEXT: [[TMP21:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD]], [[TMP17]] 851; AVX512-NEXT: [[TMP22:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD8]], [[TMP18]] 852; AVX512-NEXT: [[TMP23:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] 853; AVX512-NEXT: [[TMP24:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] 854; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP2]] 855; AVX512-NEXT: [[TMP26:%.*]] = getelementptr float, ptr [[TMP25]], i32 0 856; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 857; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 32 858; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 48 859; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP21]], ptr [[TMP26]], i32 4, <16 x i1> [[TMP8]]) 860; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP22]], ptr [[TMP27]], i32 4, <16 x i1> [[TMP9]]) 861; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP23]], ptr [[TMP28]], i32 4, <16 x i1> [[TMP10]]) 862; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP24]], ptr [[TMP29]], i32 4, <16 x i1> [[TMP11]]) 863; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 864; AVX512-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 865; AVX512-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 866; AVX512: middle.block: 867; AVX512-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 868; AVX512: vec.epilog.iter.check: 869; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 870; AVX512: vec.epilog.ph: 871; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 872; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 873; AVX512: vec.epilog.vector.body: 874; AVX512-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 875; AVX512-NEXT: [[TMP31:%.*]] = add i64 [[INDEX11]], 0 876; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP31]] 877; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 878; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i32>, ptr [[TMP33]], align 4 879; AVX512-NEXT: [[TMP34:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD12]], splat (i32 100) 880; AVX512-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP31]] 881; AVX512-NEXT: [[TMP36:%.*]] = getelementptr float, ptr [[TMP35]], i32 0 882; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[TMP36]], i32 4, <16 x i1> [[TMP34]], <16 x float> poison) 883; AVX512-NEXT: [[TMP37:%.*]] = sitofp <16 x i32> [[WIDE_LOAD12]] to <16 x float> 884; AVX512-NEXT: [[TMP38:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD13]], [[TMP37]] 885; AVX512-NEXT: [[TMP39:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP31]] 886; AVX512-NEXT: [[TMP40:%.*]] = getelementptr float, ptr [[TMP39]], i32 0 887; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP38]], ptr [[TMP40]], i32 4, <16 x i1> [[TMP34]]) 888; AVX512-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 16 889; AVX512-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 10000 890; AVX512-NEXT: br i1 [[TMP41]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 891; AVX512: vec.epilog.middle.block: 892; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] 893; AVX512: vec.epilog.scalar.ph: 894; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 895; AVX512-NEXT: br label [[FOR_BODY:%.*]] 896; AVX512: for.body: 897; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 898; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 899; AVX512-NEXT: [[TMP42:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 900; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP42]], 100 901; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 902; AVX512: if.then: 903; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] 904; AVX512-NEXT: [[TMP43:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 905; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float 906; AVX512-NEXT: [[ADD:%.*]] = fadd float [[TMP43]], [[CONV]] 907; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 908; AVX512-NEXT: store float [[ADD]], ptr [[ARRAYIDX7]], align 4 909; AVX512-NEXT: br label [[FOR_INC]] 910; AVX512: for.inc: 911; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 912; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 913; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 914; AVX512: for.end: 915; AVX512-NEXT: ret void 916; 917entry: 918 br label %for.body 919 920for.body: ; preds = %for.inc, %entry 921 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 922 %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv 923 %0 = load i32, ptr %arrayidx, align 4 924 %cmp1 = icmp slt i32 %0, 100 925 br i1 %cmp1, label %if.then, label %for.inc 926 927if.then: ; preds = %for.body 928 %arrayidx3 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 929 %1 = load float, ptr %arrayidx3, align 4 930 %conv = sitofp i32 %0 to float 931 %add = fadd float %1, %conv 932 %arrayidx7 = getelementptr inbounds float, ptr %A, i64 %indvars.iv 933 store float %add, ptr %arrayidx7, align 4 934 br label %for.inc 935 936for.inc: ; preds = %for.body, %if.then 937 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 938 %exitcond = icmp eq i64 %indvars.iv.next, 10000 939 br i1 %exitcond, label %for.end, label %for.body 940 941for.end: ; preds = %for.inc 942 ret void 943} 944 945; The source code: 946; 947;void foo3(ptr A, ptr B, int *trigger) { 948; 949; for (int i=0; i<10000; i++) { 950; if (trigger[i] < 100) { 951; A[i] = B[i] + trigger[i]; 952; } 953; } 954;} 955 956define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture readonly %trigger) local_unnamed_addr #0 { 957; AVX1-LABEL: @foo3( 958; AVX1-NEXT: entry: 959; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 960; AVX1: vector.memcheck: 961; AVX1-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 80000 962; AVX1-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 40000 963; AVX1-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 80000 964; AVX1-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] 965; AVX1-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] 966; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 967; AVX1-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] 968; AVX1-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] 969; AVX1-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] 970; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] 971; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 972; AVX1: vector.ph: 973; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 974; AVX1: vector.body: 975; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 976; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 977; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP0]] 978; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 979; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 980; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 981; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 982; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META8:![0-9]+]] 983; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] 984; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] 985; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]] 986; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) 987; AVX1-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) 988; AVX1-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) 989; AVX1-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) 990; AVX1-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] 991; AVX1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 992; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 993; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 994; AVX1-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 995; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META11:![0-9]+]] 996; AVX1-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP12]], i32 8, <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META11]] 997; AVX1-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP13]], i32 8, <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META11]] 998; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] 999; AVX1-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> 1000; AVX1-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> 1001; AVX1-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> 1002; AVX1-NEXT: [[TMP18:%.*]] = sitofp <4 x i32> [[WIDE_LOAD8]] to <4 x double> 1003; AVX1-NEXT: [[TMP19:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], [[TMP15]] 1004; AVX1-NEXT: [[TMP20:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD9]], [[TMP16]] 1005; AVX1-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] 1006; AVX1-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] 1007; AVX1-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP0]] 1008; AVX1-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i32 0 1009; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 1010; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 1011; AVX1-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 1012; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr [[TMP24]], i32 8, <4 x i1> [[TMP6]]), !alias.scope [[META13:![0-9]+]], !noalias [[META15:![0-9]+]] 1013; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr [[TMP25]], i32 8, <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] 1014; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr [[TMP26]], i32 8, <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] 1015; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr [[TMP27]], i32 8, <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] 1016; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1017; AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 1018; AVX1-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1019; AVX1: middle.block: 1020; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1021; AVX1: scalar.ph: 1022; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 1023; AVX1-NEXT: br label [[FOR_BODY:%.*]] 1024; AVX1: for.body: 1025; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1026; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1027; AVX1-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1028; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP29]], 100 1029; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1030; AVX1: if.then: 1031; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]] 1032; AVX1-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1033; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP29]] to double 1034; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[CONV]] 1035; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] 1036; AVX1-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 1037; AVX1-NEXT: br label [[FOR_INC]] 1038; AVX1: for.inc: 1039; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1040; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 1041; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1042; AVX1: for.end: 1043; AVX1-NEXT: ret void 1044; 1045; AVX2-LABEL: @foo3( 1046; AVX2-NEXT: entry: 1047; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1048; AVX2: vector.memcheck: 1049; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 80000 1050; AVX2-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 40000 1051; AVX2-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 80000 1052; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] 1053; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] 1054; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1055; AVX2-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] 1056; AVX2-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] 1057; AVX2-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] 1058; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] 1059; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1060; AVX2: vector.ph: 1061; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 1062; AVX2: vector.body: 1063; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1064; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1065; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP0]] 1066; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 1067; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 1068; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 1069; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 1070; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META11:![0-9]+]] 1071; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META11]] 1072; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META11]] 1073; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META11]] 1074; AVX2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) 1075; AVX2-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) 1076; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) 1077; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) 1078; AVX2-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] 1079; AVX2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 1080; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 1081; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 1082; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 1083; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META14:![0-9]+]] 1084; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP12]], i32 8, <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META14]] 1085; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP13]], i32 8, <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META14]] 1086; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META14]] 1087; AVX2-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> 1088; AVX2-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> 1089; AVX2-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> 1090; AVX2-NEXT: [[TMP18:%.*]] = sitofp <4 x i32> [[WIDE_LOAD8]] to <4 x double> 1091; AVX2-NEXT: [[TMP19:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], [[TMP15]] 1092; AVX2-NEXT: [[TMP20:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD9]], [[TMP16]] 1093; AVX2-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] 1094; AVX2-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] 1095; AVX2-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP0]] 1096; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i32 0 1097; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 1098; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 1099; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 1100; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr [[TMP24]], i32 8, <4 x i1> [[TMP6]]), !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]] 1101; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr [[TMP25]], i32 8, <4 x i1> [[TMP7]]), !alias.scope [[META16]], !noalias [[META18]] 1102; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr [[TMP26]], i32 8, <4 x i1> [[TMP8]]), !alias.scope [[META16]], !noalias [[META18]] 1103; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr [[TMP27]], i32 8, <4 x i1> [[TMP9]]), !alias.scope [[META16]], !noalias [[META18]] 1104; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1105; AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 1106; AVX2-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1107; AVX2: middle.block: 1108; AVX2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1109; AVX2: scalar.ph: 1110; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 1111; AVX2-NEXT: br label [[FOR_BODY:%.*]] 1112; AVX2: for.body: 1113; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1114; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1115; AVX2-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1116; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP29]], 100 1117; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1118; AVX2: if.then: 1119; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]] 1120; AVX2-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1121; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP29]] to double 1122; AVX2-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[CONV]] 1123; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] 1124; AVX2-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 1125; AVX2-NEXT: br label [[FOR_INC]] 1126; AVX2: for.inc: 1127; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1128; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 1129; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1130; AVX2: for.end: 1131; AVX2-NEXT: ret void 1132; 1133; AVX512-LABEL: @foo3( 1134; AVX512-NEXT: iter.check: 1135; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1136; AVX512: vector.memcheck: 1137; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 80000 1138; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 40000 1139; AVX512-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 80000 1140; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] 1141; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] 1142; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1143; AVX512-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] 1144; AVX512-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] 1145; AVX512-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] 1146; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] 1147; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1148; AVX512: vector.main.loop.iter.check: 1149; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 1150; AVX512: vector.ph: 1151; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1152; AVX512: vector.body: 1153; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1154; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1155; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP0]] 1156; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 1157; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 1158; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 1159; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 24 1160; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META11:![0-9]+]] 1161; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META11]] 1162; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META11]] 1163; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META11]] 1164; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) 1165; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) 1166; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) 1167; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100) 1168; AVX512-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] 1169; AVX512-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 1170; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 1171; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 16 1172; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 24 1173; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP11]], i32 8, <8 x i1> [[TMP6]], <8 x double> poison), !alias.scope [[META14:![0-9]+]] 1174; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP12]], i32 8, <8 x i1> [[TMP7]], <8 x double> poison), !alias.scope [[META14]] 1175; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP13]], i32 8, <8 x i1> [[TMP8]], <8 x double> poison), !alias.scope [[META14]] 1176; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP14]], i32 8, <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META14]] 1177; AVX512-NEXT: [[TMP15:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double> 1178; AVX512-NEXT: [[TMP16:%.*]] = sitofp <8 x i32> [[WIDE_LOAD6]] to <8 x double> 1179; AVX512-NEXT: [[TMP17:%.*]] = sitofp <8 x i32> [[WIDE_LOAD7]] to <8 x double> 1180; AVX512-NEXT: [[TMP18:%.*]] = sitofp <8 x i32> [[WIDE_LOAD8]] to <8 x double> 1181; AVX512-NEXT: [[TMP19:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD]], [[TMP15]] 1182; AVX512-NEXT: [[TMP20:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD9]], [[TMP16]] 1183; AVX512-NEXT: [[TMP21:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] 1184; AVX512-NEXT: [[TMP22:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] 1185; AVX512-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP0]] 1186; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i32 0 1187; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 1188; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 16 1189; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 24 1190; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP19]], ptr [[TMP24]], i32 8, <8 x i1> [[TMP6]]), !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]] 1191; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr [[TMP25]], i32 8, <8 x i1> [[TMP7]]), !alias.scope [[META16]], !noalias [[META18]] 1192; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr [[TMP26]], i32 8, <8 x i1> [[TMP8]]), !alias.scope [[META16]], !noalias [[META18]] 1193; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr [[TMP27]], i32 8, <8 x i1> [[TMP9]]), !alias.scope [[META16]], !noalias [[META18]] 1194; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 1195; AVX512-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 1196; AVX512-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1197; AVX512: middle.block: 1198; AVX512-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 1199; AVX512: vec.epilog.iter.check: 1200; AVX512-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] 1201; AVX512: vec.epilog.ph: 1202; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 1203; AVX512-NEXT: br label [[FOR_BODY:%.*]] 1204; AVX512: vec.epilog.vector.body: 1205; AVX512-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[FOR_BODY]] ] 1206; AVX512-NEXT: [[TMP40:%.*]] = add i64 [[INDEX12]], 0 1207; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP40]] 1208; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 0 1209; AVX512-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, ptr [[TMP31]], align 4, !alias.scope [[META20:![0-9]+]] 1210; AVX512-NEXT: [[TMP32:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD13]], splat (i32 100) 1211; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP40]] 1212; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i32 0 1213; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP34]], i32 8, <8 x i1> [[TMP32]], <8 x double> poison), !alias.scope [[META23:![0-9]+]] 1214; AVX512-NEXT: [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD13]] to <8 x double> 1215; AVX512-NEXT: [[TMP36:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD14]], [[TMP35]] 1216; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP40]] 1217; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP37]], i32 0 1218; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP36]], ptr [[TMP38]], i32 8, <8 x i1> [[TMP32]]), !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] 1219; AVX512-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX12]], 8 1220; AVX512-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT15]], 10000 1221; AVX512-NEXT: br i1 [[TMP39]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1222; AVX512: vec.epilog.middle.block: 1223; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] 1224; AVX512: vec.epilog.scalar.ph: 1225; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] 1226; AVX512-NEXT: br label [[FOR_BODY1:%.*]] 1227; AVX512: for.body: 1228; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1229; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1230; AVX512-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1231; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP29]], 100 1232; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1233; AVX512: if.then: 1234; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]] 1235; AVX512-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1236; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP29]] to double 1237; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[CONV]] 1238; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] 1239; AVX512-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 1240; AVX512-NEXT: br label [[FOR_INC]] 1241; AVX512: for.inc: 1242; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1243; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 1244; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP29:![0-9]+]] 1245; AVX512: for.end: 1246; AVX512-NEXT: ret void 1247; 1248entry: 1249 br label %for.body 1250 1251for.body: ; preds = %for.inc, %entry 1252 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1253 %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv 1254 %0 = load i32, ptr %arrayidx, align 4 1255 %cmp1 = icmp slt i32 %0, 100 1256 br i1 %cmp1, label %if.then, label %for.inc 1257 1258if.then: ; preds = %for.body 1259 %arrayidx3 = getelementptr inbounds double, ptr %B, i64 %indvars.iv 1260 %1 = load double, ptr %arrayidx3, align 8 1261 %conv = sitofp i32 %0 to double 1262 %add = fadd double %1, %conv 1263 %arrayidx7 = getelementptr inbounds double, ptr %A, i64 %indvars.iv 1264 store double %add, ptr %arrayidx7, align 8 1265 br label %for.inc 1266 1267for.inc: ; preds = %for.body, %if.then 1268 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1269 %exitcond = icmp eq i64 %indvars.iv.next, 10000 1270 br i1 %exitcond, label %for.end, label %for.body 1271 1272for.end: ; preds = %for.inc 1273 ret void 1274} 1275 1276; The source code: 1277; 1278;void foo4(ptr A, ptr B, int *trigger) { 1279; 1280; for (int i=0; i<10000; i += 16) { 1281; if (trigger[i] < 100) { 1282; A[i] = B[i*2] + trigger[i]; << non-cosecutive access 1283; } 1284; } 1285;} 1286 1287define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture readonly %trigger) local_unnamed_addr #0 { 1288; AVX-LABEL: @foo4( 1289; AVX-NEXT: entry: 1290; AVX-NEXT: br label [[FOR_BODY:%.*]] 1291; AVX: for.body: 1292; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1293; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[INDVARS_IV]] 1294; AVX-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1295; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100 1296; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1297; AVX: if.then: 1298; AVX-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 1299; AVX-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP1]] 1300; AVX-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1301; AVX-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double 1302; AVX-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], [[CONV]] 1303; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 1304; AVX-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 1305; AVX-NEXT: br label [[FOR_INC]] 1306; AVX: for.inc: 1307; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 1308; AVX-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 1309; AVX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] 1310; AVX: for.end: 1311; AVX-NEXT: ret void 1312; 1313; AVX512-LABEL: @foo4( 1314; AVX512-NEXT: entry: 1315; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1316; AVX512: vector.memcheck: 1317; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 1318; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 39940 1319; AVX512-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 159752 1320; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] 1321; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] 1322; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1323; AVX512-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] 1324; AVX512-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] 1325; AVX512-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] 1326; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] 1327; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1328; AVX512: vector.ph: 1329; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1330; AVX512: vector.body: 1331; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1332; AVX512-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1333; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <8 x i64> [[VEC_IND]] 1334; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !alias.scope [[META30:![0-9]+]] 1335; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100) 1336; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], splat (i64 1) 1337; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[B]], <8 x i64> [[TMP2]] 1338; AVX512-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP3]], i32 8, <8 x i1> [[TMP1]], <8 x double> poison), !alias.scope [[META33:![0-9]+]] 1339; AVX512-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double> 1340; AVX512-NEXT: [[TMP5:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER6]], [[TMP4]] 1341; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], <8 x i64> [[VEC_IND]] 1342; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> [[TMP5]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[TMP1]]), !alias.scope [[META35:![0-9]+]], !noalias [[META37:![0-9]+]] 1343; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1344; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 128) 1345; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 1346; AVX512-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] 1347; AVX512: middle.block: 1348; AVX512-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1349; AVX512: scalar.ph: 1350; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 1351; AVX512-NEXT: br label [[FOR_BODY:%.*]] 1352; AVX512: for.body: 1353; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1354; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1355; AVX512-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1356; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 100 1357; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1358; AVX512: if.then: 1359; AVX512-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 1360; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP9]] 1361; AVX512-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1362; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double 1363; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP10]], [[CONV]] 1364; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] 1365; AVX512-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 1366; AVX512-NEXT: br label [[FOR_INC]] 1367; AVX512: for.inc: 1368; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 1369; AVX512-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 1370; AVX512-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP39:![0-9]+]] 1371; AVX512: for.end: 1372; AVX512-NEXT: ret void 1373; 1374entry: 1375 br label %for.body 1376 1377for.body: ; preds = %entry, %for.inc 1378 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1379 %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv 1380 %0 = load i32, ptr %arrayidx, align 4 1381 %cmp1 = icmp slt i32 %0, 100 1382 br i1 %cmp1, label %if.then, label %for.inc 1383 1384if.then: ; preds = %for.body 1385 %1 = shl nuw nsw i64 %indvars.iv, 1 1386 %arrayidx3 = getelementptr inbounds double, ptr %B, i64 %1 1387 %2 = load double, ptr %arrayidx3, align 8 1388 %conv = sitofp i32 %0 to double 1389 %add = fadd double %2, %conv 1390 %arrayidx7 = getelementptr inbounds double, ptr %A, i64 %indvars.iv 1391 store double %add, ptr %arrayidx7, align 8 1392 br label %for.inc 1393 1394for.inc: ; preds = %for.body, %if.then 1395 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16 1396 %cmp = icmp ult i64 %indvars.iv.next, 10000 1397 br i1 %cmp, label %for.body, label %for.end 1398 1399for.end: ; preds = %for.inc 1400 ret void 1401} 1402 1403@a = common global [1 x ptr] zeroinitializer, align 8 1404@c = common global ptr null, align 8 1405 1406; Reverse loop 1407;void foo6(ptr in, ptr out, unsigned size, int *trigger) { 1408; 1409; for (int i=SIZE-1; i>=0; i--) { 1410; if (trigger[i] > 0) { 1411; out[i] = in[i] + (double) 0.5; 1412; } 1413; } 1414;} 1415 1416define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr nocapture readonly %trigger) local_unnamed_addr #0 { 1417; AVX1-LABEL: @foo6( 1418; AVX1-NEXT: entry: 1419; AVX1-NEXT: br label [[FOR_BODY:%.*]] 1420; AVX1: for.body: 1421; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 4095, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1422; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[INDVARS_IV]] 1423; AVX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1424; AVX1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 1425; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1426; AVX1: if.then: 1427; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[IN:%.*]], i64 [[INDVARS_IV]] 1428; AVX1-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1429; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e-01 1430; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], i64 [[INDVARS_IV]] 1431; AVX1-NEXT: store double [[ADD]], ptr [[ARRAYIDX5]], align 8 1432; AVX1-NEXT: br label [[FOR_INC]] 1433; AVX1: for.inc: 1434; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 1435; AVX1-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 1436; AVX1-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1437; AVX1: for.end: 1438; AVX1-NEXT: ret void 1439; 1440; AVX2-LABEL: @foo6( 1441; AVX2-NEXT: entry: 1442; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1443; AVX2: vector.memcheck: 1444; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 32768 1445; AVX2-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 16384 1446; AVX2-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 32768 1447; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[OUT]], [[SCEVGEP1]] 1448; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] 1449; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1450; AVX2-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[OUT]], [[SCEVGEP2]] 1451; AVX2-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[IN]], [[SCEVGEP]] 1452; AVX2-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] 1453; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] 1454; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1455; AVX2: vector.ph: 1456; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 1457; AVX2: vector.body: 1458; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1459; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] 1460; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 1461; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP0]] 1462; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 1463; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 1464; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -4 1465; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 1466; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 1467; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 1468; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -12 1469; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 1470; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META21:![0-9]+]] 1471; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1472; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META21]] 1473; AVX2-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD6]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1474; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META21]] 1475; AVX2-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD8]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1476; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META21]] 1477; AVX2-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD10]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1478; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer 1479; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[REVERSE7]], zeroinitializer 1480; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[REVERSE9]], zeroinitializer 1481; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[REVERSE11]], zeroinitializer 1482; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP0]] 1483; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 1484; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -3 1485; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -4 1486; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -3 1487; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 1488; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -3 1489; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -12 1490; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -3 1491; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1492; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP16]], i32 8, <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META24:![0-9]+]] 1493; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1494; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1495; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP18]], i32 8, <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META24]] 1496; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1497; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP12]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1498; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP20]], i32 8, <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META24]] 1499; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1500; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1501; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP22]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META24]] 1502; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1503; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) 1504; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) 1505; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) 1506; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) 1507; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]] 1508; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 1509; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3 1510; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -4 1511; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3 1512; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 1513; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -3 1514; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -12 1515; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -3 1516; AVX2-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1517; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr [[TMP29]], i32 8, <4 x i1> [[REVERSE12]]), !alias.scope [[META26:![0-9]+]], !noalias [[META28:![0-9]+]] 1518; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1519; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr [[TMP31]], i32 8, <4 x i1> [[REVERSE14]]), !alias.scope [[META26]], !noalias [[META28]] 1520; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1521; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr [[TMP33]], i32 8, <4 x i1> [[REVERSE17]]), !alias.scope [[META26]], !noalias [[META28]] 1522; AVX2-NEXT: [[REVERSE30:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1523; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr [[TMP35]], i32 8, <4 x i1> [[REVERSE20]]), !alias.scope [[META26]], !noalias [[META28]] 1524; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1525; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 1526; AVX2-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1527; AVX2: middle.block: 1528; AVX2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1529; AVX2: scalar.ph: 1530; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[VECTOR_MEMCHECK]] ], [ 4095, [[ENTRY:%.*]] ] 1531; AVX2-NEXT: br label [[FOR_BODY:%.*]] 1532; AVX2: for.body: 1533; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1534; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1535; AVX2-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1536; AVX2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP37]], 0 1537; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1538; AVX2: if.then: 1539; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[IN]], i64 [[INDVARS_IV]] 1540; AVX2-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1541; AVX2-NEXT: [[ADD:%.*]] = fadd double [[TMP38]], 5.000000e-01 1542; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 1543; AVX2-NEXT: store double [[ADD]], ptr [[ARRAYIDX5]], align 8 1544; AVX2-NEXT: br label [[FOR_INC]] 1545; AVX2: for.inc: 1546; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 1547; AVX2-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 1548; AVX2-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1549; AVX2: for.end: 1550; AVX2-NEXT: ret void 1551; 1552; AVX512-LABEL: @foo6( 1553; AVX512-NEXT: entry: 1554; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1555; AVX512: vector.memcheck: 1556; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 32768 1557; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i64 16384 1558; AVX512-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 32768 1559; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[OUT]], [[SCEVGEP1]] 1560; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] 1561; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1562; AVX512-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[OUT]], [[SCEVGEP2]] 1563; AVX512-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[IN]], [[SCEVGEP]] 1564; AVX512-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] 1565; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] 1566; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1567; AVX512: vector.ph: 1568; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1569; AVX512: vector.body: 1570; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1571; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] 1572; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 1573; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP0]] 1574; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 1575; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -7 1576; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 1577; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -7 1578; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -16 1579; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -7 1580; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -24 1581; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -7 1582; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META40:![0-9]+]] 1583; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1584; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META40]] 1585; AVX512-NEXT: [[REVERSE7:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD6]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1586; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META40]] 1587; AVX512-NEXT: [[REVERSE9:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD8]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1588; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META40]] 1589; AVX512-NEXT: [[REVERSE11:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD10]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1590; AVX512-NEXT: [[TMP10:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer 1591; AVX512-NEXT: [[TMP11:%.*]] = icmp sgt <8 x i32> [[REVERSE7]], zeroinitializer 1592; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <8 x i32> [[REVERSE9]], zeroinitializer 1593; AVX512-NEXT: [[TMP13:%.*]] = icmp sgt <8 x i32> [[REVERSE11]], zeroinitializer 1594; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP0]] 1595; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 1596; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -7 1597; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 1598; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -7 1599; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -16 1600; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -7 1601; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -24 1602; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -7 1603; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1604; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP16]], i32 8, <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META43:![0-9]+]] 1605; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1606; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1607; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP18]], i32 8, <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META43]] 1608; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1609; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1610; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP20]], i32 8, <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META43]] 1611; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1612; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1613; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP22]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META43]] 1614; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1615; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) 1616; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) 1617; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) 1618; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) 1619; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]] 1620; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 1621; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7 1622; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 1623; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7 1624; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -16 1625; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -7 1626; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -24 1627; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -7 1628; AVX512-NEXT: [[REVERSE24:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1629; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr [[TMP29]], i32 8, <8 x i1> [[REVERSE12]]), !alias.scope [[META45:![0-9]+]], !noalias [[META47:![0-9]+]] 1630; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1631; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr [[TMP31]], i32 8, <8 x i1> [[REVERSE14]]), !alias.scope [[META45]], !noalias [[META47]] 1632; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1633; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr [[TMP33]], i32 8, <8 x i1> [[REVERSE17]]), !alias.scope [[META45]], !noalias [[META47]] 1634; AVX512-NEXT: [[REVERSE30:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1635; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr [[TMP35]], i32 8, <8 x i1> [[REVERSE20]]), !alias.scope [[META45]], !noalias [[META47]] 1636; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 1637; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 1638; AVX512-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] 1639; AVX512: middle.block: 1640; AVX512-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1641; AVX512: scalar.ph: 1642; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[VECTOR_MEMCHECK]] ], [ 4095, [[ENTRY:%.*]] ] 1643; AVX512-NEXT: br label [[FOR_BODY:%.*]] 1644; AVX512: for.body: 1645; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1646; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1647; AVX512-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1648; AVX512-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP37]], 0 1649; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1650; AVX512: if.then: 1651; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[IN]], i64 [[INDVARS_IV]] 1652; AVX512-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 1653; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP38]], 5.000000e-01 1654; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 1655; AVX512-NEXT: store double [[ADD]], ptr [[ARRAYIDX5]], align 8 1656; AVX512-NEXT: br label [[FOR_INC]] 1657; AVX512: for.inc: 1658; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 1659; AVX512-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 1660; AVX512-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] 1661; AVX512: for.end: 1662; AVX512-NEXT: ret void 1663; 1664entry: 1665 br label %for.body 1666 1667for.body: ; preds = %for.inc, %entry 1668 %indvars.iv = phi i64 [ 4095, %entry ], [ %indvars.iv.next, %for.inc ] 1669 %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv 1670 %0 = load i32, ptr %arrayidx, align 4 1671 %cmp1 = icmp sgt i32 %0, 0 1672 br i1 %cmp1, label %if.then, label %for.inc 1673 1674if.then: ; preds = %for.body 1675 %arrayidx3 = getelementptr inbounds double, ptr %in, i64 %indvars.iv 1676 %1 = load double, ptr %arrayidx3, align 8 1677 %add = fadd double %1, 5.000000e-01 1678 %arrayidx5 = getelementptr inbounds double, ptr %out, i64 %indvars.iv 1679 store double %add, ptr %arrayidx5, align 8 1680 br label %for.inc 1681 1682for.inc: ; preds = %for.body, %if.then 1683 %indvars.iv.next = add nsw i64 %indvars.iv, -1 1684 %cmp = icmp eq i64 %indvars.iv, 0 1685 br i1 %cmp, label %for.end, label %for.body 1686 1687for.end: ; preds = %for.inc 1688 ret void 1689} 1690 1691; void foo7 (ptr __restrict__ out, ptr __restrict__ in, 1692; bool * __restrict__ trigger, unsigned size) { 1693; 1694; for (unsigned i=0; i<size; i++) 1695; if (trigger[i] && (in[i] != 0)) 1696; out[i] = (double) 0.5; 1697; } 1698 1699define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, ptr noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 { 1700; AVX1-LABEL: @foo7( 1701; AVX1-NEXT: entry: 1702; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 1703; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 1704; AVX1: iter.check: 1705; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 1706; AVX1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 1707; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 1708; AVX1: vector.main.loop.iter.check: 1709; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 1710; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1711; AVX1: vector.ph: 1712; AVX1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 1713; AVX1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1714; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 1715; AVX1: vector.body: 1716; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1717; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1718; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER:%.*]], i64 [[TMP0]] 1719; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1720; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 1721; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 1722; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 1723; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 1724; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 1725; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 1726; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 1727; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) 1728; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) 1729; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) 1730; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) 1731; AVX1-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer 1732; AVX1-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer 1733; AVX1-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer 1734; AVX1-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer 1735; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) 1736; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) 1737; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) 1738; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) 1739; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] 1740; AVX1-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 1741; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 1742; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 1743; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 1744; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP19]], i32 8, <4 x i1> [[TMP14]], <4 x ptr> poison) 1745; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP20]], i32 8, <4 x i1> [[TMP15]], <4 x ptr> poison) 1746; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP21]], i32 8, <4 x i1> [[TMP16]], <4 x ptr> poison) 1747; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP22]], i32 8, <4 x i1> [[TMP17]], <4 x ptr> poison) 1748; AVX1-NEXT: [[TMP23:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD]], zeroinitializer 1749; AVX1-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer 1750; AVX1-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer 1751; AVX1-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer 1752; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) 1753; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) 1754; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) 1755; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) 1756; AVX1-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer 1757; AVX1-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer 1758; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer 1759; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer 1760; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT:%.*]], i64 [[TMP0]] 1761; AVX1-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP35]], i32 0 1762; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 1763; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 1764; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 1765; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) 1766; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) 1767; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) 1768; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) 1769; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1770; AVX1-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1771; AVX1-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1772; AVX1: middle.block: 1773; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1774; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 1775; AVX1: vec.epilog.iter.check: 1776; AVX1-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1777; AVX1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 1778; AVX1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] 1779; AVX1: vec.epilog.ph: 1780; AVX1-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 1781; AVX1-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 1782; AVX1-NEXT: [[N_VEC9:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF8]] 1783; AVX1-NEXT: br label [[FOR_BODY:%.*]] 1784; AVX1: vec.epilog.vector.body: 1785; AVX1-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT13:%.*]], [[FOR_BODY]] ] 1786; AVX1-NEXT: [[TMP55:%.*]] = add i64 [[INDEX10]], 0 1787; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[TMP55]] 1788; AVX1-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[TMP56]], i32 0 1789; AVX1-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP57]], align 1 1790; AVX1-NEXT: [[TMP44:%.*]] = and <4 x i8> [[WIDE_LOAD11]], splat (i8 1) 1791; AVX1-NEXT: [[TMP45:%.*]] = icmp eq <4 x i8> [[TMP44]], zeroinitializer 1792; AVX1-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP45]], splat (i1 true) 1793; AVX1-NEXT: [[TMP47:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[TMP55]] 1794; AVX1-NEXT: [[TMP48:%.*]] = getelementptr ptr, ptr [[TMP47]], i32 0 1795; AVX1-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP48]], i32 8, <4 x i1> [[TMP46]], <4 x ptr> poison) 1796; AVX1-NEXT: [[TMP49:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD12]], zeroinitializer 1797; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP49]], splat (i1 true) 1798; AVX1-NEXT: [[TMP51:%.*]] = select <4 x i1> [[TMP46]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 1799; AVX1-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP55]] 1800; AVX1-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[TMP52]], i32 0 1801; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP53]], i32 8, <4 x i1> [[TMP51]]) 1802; AVX1-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4 1803; AVX1-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC9]] 1804; AVX1-NEXT: br i1 [[TMP54]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1805; AVX1: vec.epilog.middle.block: 1806; AVX1-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] 1807; AVX1-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 1808; AVX1: vec.epilog.scalar.ph: 1809; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 1810; AVX1-NEXT: br label [[FOR_BODY1:%.*]] 1811; AVX1: for.body: 1812; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1813; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1814; AVX1-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 1815; AVX1-NEXT: [[TMP42:%.*]] = and i8 [[TMP41]], 1 1816; AVX1-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP42]], 0 1817; AVX1-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 1818; AVX1: land.lhs.true: 1819; AVX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[IN]], i64 [[INDVARS_IV]] 1820; AVX1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 1821; AVX1-NEXT: [[CMP3:%.*]] = icmp eq ptr [[TMP43]], null 1822; AVX1-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 1823; AVX1: if.then: 1824; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 1825; AVX1-NEXT: store double 5.000000e-01, ptr [[ARRAYIDX5]], align 8 1826; AVX1-NEXT: br label [[FOR_INC]] 1827; AVX1: for.inc: 1828; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1829; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1830; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] 1831; AVX1: for.end.loopexit: 1832; AVX1-NEXT: br label [[FOR_END]] 1833; AVX1: for.end: 1834; AVX1-NEXT: ret void 1835; 1836; AVX2-LABEL: @foo7( 1837; AVX2-NEXT: entry: 1838; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 1839; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 1840; AVX2: iter.check: 1841; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 1842; AVX2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 1843; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 1844; AVX2: vector.main.loop.iter.check: 1845; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 1846; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1847; AVX2: vector.ph: 1848; AVX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 1849; AVX2-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1850; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 1851; AVX2: vector.body: 1852; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1853; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1854; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER:%.*]], i64 [[TMP0]] 1855; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1856; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 1857; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 1858; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 1859; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 1860; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 1861; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 1862; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 1863; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) 1864; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) 1865; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) 1866; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) 1867; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer 1868; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer 1869; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer 1870; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer 1871; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) 1872; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) 1873; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) 1874; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) 1875; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] 1876; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 1877; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 1878; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 1879; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 1880; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP19]], i32 8, <4 x i1> [[TMP14]], <4 x ptr> poison) 1881; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP20]], i32 8, <4 x i1> [[TMP15]], <4 x ptr> poison) 1882; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP21]], i32 8, <4 x i1> [[TMP16]], <4 x ptr> poison) 1883; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP22]], i32 8, <4 x i1> [[TMP17]], <4 x ptr> poison) 1884; AVX2-NEXT: [[TMP23:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD]], zeroinitializer 1885; AVX2-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer 1886; AVX2-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer 1887; AVX2-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer 1888; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) 1889; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) 1890; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) 1891; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) 1892; AVX2-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer 1893; AVX2-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer 1894; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer 1895; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer 1896; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT:%.*]], i64 [[TMP0]] 1897; AVX2-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP35]], i32 0 1898; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 1899; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 1900; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 1901; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) 1902; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) 1903; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) 1904; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) 1905; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1906; AVX2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1907; AVX2-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 1908; AVX2: middle.block: 1909; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1910; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 1911; AVX2: vec.epilog.iter.check: 1912; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1913; AVX2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 1914; AVX2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] 1915; AVX2: vec.epilog.ph: 1916; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 1917; AVX2-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 1918; AVX2-NEXT: [[N_VEC9:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF8]] 1919; AVX2-NEXT: br label [[FOR_BODY:%.*]] 1920; AVX2: vec.epilog.vector.body: 1921; AVX2-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT13:%.*]], [[FOR_BODY]] ] 1922; AVX2-NEXT: [[TMP55:%.*]] = add i64 [[INDEX10]], 0 1923; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[TMP55]] 1924; AVX2-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[TMP56]], i32 0 1925; AVX2-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP57]], align 1 1926; AVX2-NEXT: [[TMP44:%.*]] = and <4 x i8> [[WIDE_LOAD11]], splat (i8 1) 1927; AVX2-NEXT: [[TMP45:%.*]] = icmp eq <4 x i8> [[TMP44]], zeroinitializer 1928; AVX2-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP45]], splat (i1 true) 1929; AVX2-NEXT: [[TMP47:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[TMP55]] 1930; AVX2-NEXT: [[TMP48:%.*]] = getelementptr ptr, ptr [[TMP47]], i32 0 1931; AVX2-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP48]], i32 8, <4 x i1> [[TMP46]], <4 x ptr> poison) 1932; AVX2-NEXT: [[TMP49:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD12]], zeroinitializer 1933; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP49]], splat (i1 true) 1934; AVX2-NEXT: [[TMP51:%.*]] = select <4 x i1> [[TMP46]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 1935; AVX2-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP55]] 1936; AVX2-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[TMP52]], i32 0 1937; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP53]], i32 8, <4 x i1> [[TMP51]]) 1938; AVX2-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4 1939; AVX2-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC9]] 1940; AVX2-NEXT: br i1 [[TMP54]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1941; AVX2: vec.epilog.middle.block: 1942; AVX2-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] 1943; AVX2-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 1944; AVX2: vec.epilog.scalar.ph: 1945; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 1946; AVX2-NEXT: br label [[FOR_BODY1:%.*]] 1947; AVX2: for.body: 1948; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1949; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 1950; AVX2-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 1951; AVX2-NEXT: [[TMP42:%.*]] = and i8 [[TMP41]], 1 1952; AVX2-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP42]], 0 1953; AVX2-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 1954; AVX2: land.lhs.true: 1955; AVX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[IN]], i64 [[INDVARS_IV]] 1956; AVX2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 1957; AVX2-NEXT: [[CMP3:%.*]] = icmp eq ptr [[TMP43]], null 1958; AVX2-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 1959; AVX2: if.then: 1960; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 1961; AVX2-NEXT: store double 5.000000e-01, ptr [[ARRAYIDX5]], align 8 1962; AVX2-NEXT: br label [[FOR_INC]] 1963; AVX2: for.inc: 1964; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1965; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1966; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP33:![0-9]+]] 1967; AVX2: for.end.loopexit: 1968; AVX2-NEXT: br label [[FOR_END]] 1969; AVX2: for.end: 1970; AVX2-NEXT: ret void 1971; 1972; AVX512-LABEL: @foo7( 1973; AVX512-NEXT: entry: 1974; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 1975; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 1976; AVX512: iter.check: 1977; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 1978; AVX512-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 8 1979; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 1980; AVX512: vector.main.loop.iter.check: 1981; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 1982; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1983; AVX512: vector.ph: 1984; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 1985; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1986; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1987; AVX512: vector.body: 1988; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1989; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1990; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER:%.*]], i64 [[TMP0]] 1991; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1992; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 1993; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 1994; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 1995; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 1996; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 1997; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 1998; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 1999; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1) 2000; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1) 2001; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1) 2002; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1) 2003; AVX512-NEXT: [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer 2004; AVX512-NEXT: [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer 2005; AVX512-NEXT: [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer 2006; AVX512-NEXT: [[TMP13:%.*]] = icmp eq <8 x i8> [[TMP9]], zeroinitializer 2007; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], splat (i1 true) 2008; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], splat (i1 true) 2009; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], splat (i1 true) 2010; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], splat (i1 true) 2011; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] 2012; AVX512-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 2013; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 2014; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 2015; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 2016; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP19]], i32 8, <8 x i1> [[TMP14]], <8 x ptr> poison) 2017; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP20]], i32 8, <8 x i1> [[TMP15]], <8 x ptr> poison) 2018; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP21]], i32 8, <8 x i1> [[TMP16]], <8 x ptr> poison) 2019; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP22]], i32 8, <8 x i1> [[TMP17]], <8 x ptr> poison) 2020; AVX512-NEXT: [[TMP23:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD]], zeroinitializer 2021; AVX512-NEXT: [[TMP24:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer 2022; AVX512-NEXT: [[TMP25:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer 2023; AVX512-NEXT: [[TMP26:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer 2024; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], splat (i1 true) 2025; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], splat (i1 true) 2026; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], splat (i1 true) 2027; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], splat (i1 true) 2028; AVX512-NEXT: [[TMP31:%.*]] = select <8 x i1> [[TMP14]], <8 x i1> [[TMP27]], <8 x i1> zeroinitializer 2029; AVX512-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP15]], <8 x i1> [[TMP28]], <8 x i1> zeroinitializer 2030; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer 2031; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer 2032; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT:%.*]], i64 [[TMP0]] 2033; AVX512-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP35]], i32 0 2034; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 2035; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 2036; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 2037; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) 2038; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) 2039; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) 2040; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) 2041; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 2042; AVX512-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2043; AVX512-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] 2044; AVX512: middle.block: 2045; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2046; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 2047; AVX512: vec.epilog.iter.check: 2048; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2049; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 2050; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] 2051; AVX512: vec.epilog.ph: 2052; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 2053; AVX512-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 8 2054; AVX512-NEXT: [[N_VEC9:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF8]] 2055; AVX512-NEXT: br label [[FOR_BODY:%.*]] 2056; AVX512: vec.epilog.vector.body: 2057; AVX512-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT13:%.*]], [[FOR_BODY]] ] 2058; AVX512-NEXT: [[TMP55:%.*]] = add i64 [[INDEX10]], 0 2059; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[TMP55]] 2060; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[TMP56]], i32 0 2061; AVX512-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i8>, ptr [[TMP57]], align 1 2062; AVX512-NEXT: [[TMP44:%.*]] = and <8 x i8> [[WIDE_LOAD11]], splat (i8 1) 2063; AVX512-NEXT: [[TMP45:%.*]] = icmp eq <8 x i8> [[TMP44]], zeroinitializer 2064; AVX512-NEXT: [[TMP46:%.*]] = xor <8 x i1> [[TMP45]], splat (i1 true) 2065; AVX512-NEXT: [[TMP47:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[TMP55]] 2066; AVX512-NEXT: [[TMP48:%.*]] = getelementptr ptr, ptr [[TMP47]], i32 0 2067; AVX512-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP48]], i32 8, <8 x i1> [[TMP46]], <8 x ptr> poison) 2068; AVX512-NEXT: [[TMP49:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD12]], zeroinitializer 2069; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP49]], splat (i1 true) 2070; AVX512-NEXT: [[TMP51:%.*]] = select <8 x i1> [[TMP46]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer 2071; AVX512-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP55]] 2072; AVX512-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[TMP52]], i32 0 2073; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP53]], i32 8, <8 x i1> [[TMP51]]) 2074; AVX512-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 8 2075; AVX512-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC9]] 2076; AVX512-NEXT: br i1 [[TMP54]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP51:![0-9]+]] 2077; AVX512: vec.epilog.middle.block: 2078; AVX512-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] 2079; AVX512-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 2080; AVX512: vec.epilog.scalar.ph: 2081; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 2082; AVX512-NEXT: br label [[FOR_BODY1:%.*]] 2083; AVX512: for.body: 2084; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2085; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 2086; AVX512-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2087; AVX512-NEXT: [[TMP42:%.*]] = and i8 [[TMP41]], 1 2088; AVX512-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP42]], 0 2089; AVX512-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2090; AVX512: land.lhs.true: 2091; AVX512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[IN]], i64 [[INDVARS_IV]] 2092; AVX512-NEXT: [[TMP43:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 2093; AVX512-NEXT: [[CMP3:%.*]] = icmp eq ptr [[TMP43]], null 2094; AVX512-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2095; AVX512: if.then: 2096; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 2097; AVX512-NEXT: store double 5.000000e-01, ptr [[ARRAYIDX5]], align 8 2098; AVX512-NEXT: br label [[FOR_INC]] 2099; AVX512: for.inc: 2100; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2101; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2102; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP52:![0-9]+]] 2103; AVX512: for.end.loopexit: 2104; AVX512-NEXT: br label [[FOR_END]] 2105; AVX512: for.end: 2106; AVX512-NEXT: ret void 2107; 2108entry: 2109 %cmp5 = icmp eq i32 %size, 0 2110 br i1 %cmp5, label %for.end, label %for.body.preheader 2111 2112for.body.preheader: ; preds = %entry 2113 %wide.trip.count = zext i32 %size to i64 2114 br label %for.body 2115 2116for.body: ; preds = %for.inc, %for.body.preheader 2117 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 2118 %arrayidx = getelementptr inbounds i8, ptr %trigger, i64 %indvars.iv 2119 %0 = load i8, ptr %arrayidx, align 1 2120 %1 = and i8 %0, 1 2121 %tobool = icmp eq i8 %1, 0 2122 br i1 %tobool, label %for.inc, label %land.lhs.true 2123 2124land.lhs.true: ; preds = %for.body 2125 %arrayidx2 = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv 2126 %2 = load ptr, ptr %arrayidx2, align 8 2127 %cmp3 = icmp eq ptr %2, null 2128 br i1 %cmp3, label %for.inc, label %if.then 2129 2130if.then: ; preds = %land.lhs.true 2131 %arrayidx5 = getelementptr inbounds double, ptr %out, i64 %indvars.iv 2132 store double 5.000000e-01, ptr %arrayidx5, align 8 2133 br label %for.inc 2134 2135for.inc: ; preds = %land.lhs.true, %for.body, %if.then 2136 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2137 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 2138 br i1 %exitcond, label %for.end, label %for.body 2139 2140for.end: ; preds = %for.inc, %entry 2141 ret void 2142} 2143 2144;typedef int (*fp)(); 2145;void foo8 (ptr __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) { 2146; 2147; for (unsigned i=0; i<size; i++) 2148; if (trigger[i] && (in[i] != 0)) 2149; out[i] = (double) 0.5; 2150;} 2151 2152define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, ptr noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 { 2153; AVX1-LABEL: @foo8( 2154; AVX1-NEXT: entry: 2155; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2156; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2157; AVX1: iter.check: 2158; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2159; AVX1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 2160; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 2161; AVX1: vector.main.loop.iter.check: 2162; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 2163; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2164; AVX1: vector.ph: 2165; AVX1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 2166; AVX1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2167; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 2168; AVX1: vector.body: 2169; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2170; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2171; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER:%.*]], i64 [[TMP0]] 2172; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 2173; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 2174; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 2175; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 2176; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 2177; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 2178; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 2179; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 2180; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) 2181; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) 2182; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) 2183; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) 2184; AVX1-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer 2185; AVX1-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer 2186; AVX1-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer 2187; AVX1-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer 2188; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) 2189; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) 2190; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) 2191; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) 2192; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] 2193; AVX1-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 2194; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 2195; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 2196; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 2197; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP19]], i32 8, <4 x i1> [[TMP14]], <4 x ptr> poison) 2198; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP20]], i32 8, <4 x i1> [[TMP15]], <4 x ptr> poison) 2199; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP21]], i32 8, <4 x i1> [[TMP16]], <4 x ptr> poison) 2200; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP22]], i32 8, <4 x i1> [[TMP17]], <4 x ptr> poison) 2201; AVX1-NEXT: [[TMP23:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD]], zeroinitializer 2202; AVX1-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer 2203; AVX1-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer 2204; AVX1-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer 2205; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) 2206; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) 2207; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) 2208; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) 2209; AVX1-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer 2210; AVX1-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer 2211; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer 2212; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer 2213; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT:%.*]], i64 [[TMP0]] 2214; AVX1-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP35]], i32 0 2215; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 2216; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 2217; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 2218; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) 2219; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) 2220; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) 2221; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) 2222; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2223; AVX1-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2224; AVX1-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 2225; AVX1: middle.block: 2226; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2227; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 2228; AVX1: vec.epilog.iter.check: 2229; AVX1-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2230; AVX1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 2231; AVX1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] 2232; AVX1: vec.epilog.ph: 2233; AVX1-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 2234; AVX1-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 2235; AVX1-NEXT: [[N_VEC9:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF8]] 2236; AVX1-NEXT: br label [[FOR_BODY:%.*]] 2237; AVX1: vec.epilog.vector.body: 2238; AVX1-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT13:%.*]], [[FOR_BODY]] ] 2239; AVX1-NEXT: [[TMP55:%.*]] = add i64 [[INDEX10]], 0 2240; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[TMP55]] 2241; AVX1-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[TMP56]], i32 0 2242; AVX1-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP57]], align 1 2243; AVX1-NEXT: [[TMP44:%.*]] = and <4 x i8> [[WIDE_LOAD11]], splat (i8 1) 2244; AVX1-NEXT: [[TMP45:%.*]] = icmp eq <4 x i8> [[TMP44]], zeroinitializer 2245; AVX1-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP45]], splat (i1 true) 2246; AVX1-NEXT: [[TMP47:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[TMP55]] 2247; AVX1-NEXT: [[TMP48:%.*]] = getelementptr ptr, ptr [[TMP47]], i32 0 2248; AVX1-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP48]], i32 8, <4 x i1> [[TMP46]], <4 x ptr> poison) 2249; AVX1-NEXT: [[TMP49:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD12]], zeroinitializer 2250; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP49]], splat (i1 true) 2251; AVX1-NEXT: [[TMP51:%.*]] = select <4 x i1> [[TMP46]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 2252; AVX1-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP55]] 2253; AVX1-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[TMP52]], i32 0 2254; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP53]], i32 8, <4 x i1> [[TMP51]]) 2255; AVX1-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4 2256; AVX1-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC9]] 2257; AVX1-NEXT: br i1 [[TMP54]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 2258; AVX1: vec.epilog.middle.block: 2259; AVX1-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] 2260; AVX1-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 2261; AVX1: vec.epilog.scalar.ph: 2262; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 2263; AVX1-NEXT: br label [[FOR_BODY1:%.*]] 2264; AVX1: for.body: 2265; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2266; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 2267; AVX1-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2268; AVX1-NEXT: [[TMP42:%.*]] = and i8 [[TMP41]], 1 2269; AVX1-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP42]], 0 2270; AVX1-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2271; AVX1: land.lhs.true: 2272; AVX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[IN]], i64 [[INDVARS_IV]] 2273; AVX1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 2274; AVX1-NEXT: [[CMP3:%.*]] = icmp eq ptr [[TMP43]], null 2275; AVX1-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2276; AVX1: if.then: 2277; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 2278; AVX1-NEXT: store double 5.000000e-01, ptr [[ARRAYIDX5]], align 8 2279; AVX1-NEXT: br label [[FOR_INC]] 2280; AVX1: for.inc: 2281; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2282; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2283; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP23:![0-9]+]] 2284; AVX1: for.end.loopexit: 2285; AVX1-NEXT: br label [[FOR_END]] 2286; AVX1: for.end: 2287; AVX1-NEXT: ret void 2288; 2289; AVX2-LABEL: @foo8( 2290; AVX2-NEXT: entry: 2291; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2292; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2293; AVX2: iter.check: 2294; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2295; AVX2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 2296; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 2297; AVX2: vector.main.loop.iter.check: 2298; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 2299; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2300; AVX2: vector.ph: 2301; AVX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 2302; AVX2-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2303; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 2304; AVX2: vector.body: 2305; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2306; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2307; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER:%.*]], i64 [[TMP0]] 2308; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 2309; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 2310; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 2311; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 2312; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 2313; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 2314; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 2315; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 2316; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) 2317; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) 2318; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) 2319; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) 2320; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer 2321; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer 2322; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer 2323; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer 2324; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) 2325; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) 2326; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) 2327; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) 2328; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] 2329; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 2330; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 2331; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 2332; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 2333; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP19]], i32 8, <4 x i1> [[TMP14]], <4 x ptr> poison) 2334; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP20]], i32 8, <4 x i1> [[TMP15]], <4 x ptr> poison) 2335; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP21]], i32 8, <4 x i1> [[TMP16]], <4 x ptr> poison) 2336; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP22]], i32 8, <4 x i1> [[TMP17]], <4 x ptr> poison) 2337; AVX2-NEXT: [[TMP23:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD]], zeroinitializer 2338; AVX2-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer 2339; AVX2-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer 2340; AVX2-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer 2341; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) 2342; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) 2343; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) 2344; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) 2345; AVX2-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer 2346; AVX2-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer 2347; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer 2348; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer 2349; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT:%.*]], i64 [[TMP0]] 2350; AVX2-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP35]], i32 0 2351; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 2352; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 2353; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 2354; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) 2355; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) 2356; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) 2357; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) 2358; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2359; AVX2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2360; AVX2-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 2361; AVX2: middle.block: 2362; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2363; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 2364; AVX2: vec.epilog.iter.check: 2365; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2366; AVX2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 2367; AVX2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] 2368; AVX2: vec.epilog.ph: 2369; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 2370; AVX2-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 2371; AVX2-NEXT: [[N_VEC9:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF8]] 2372; AVX2-NEXT: br label [[FOR_BODY:%.*]] 2373; AVX2: vec.epilog.vector.body: 2374; AVX2-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT13:%.*]], [[FOR_BODY]] ] 2375; AVX2-NEXT: [[TMP55:%.*]] = add i64 [[INDEX10]], 0 2376; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[TMP55]] 2377; AVX2-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[TMP56]], i32 0 2378; AVX2-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP57]], align 1 2379; AVX2-NEXT: [[TMP44:%.*]] = and <4 x i8> [[WIDE_LOAD11]], splat (i8 1) 2380; AVX2-NEXT: [[TMP45:%.*]] = icmp eq <4 x i8> [[TMP44]], zeroinitializer 2381; AVX2-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP45]], splat (i1 true) 2382; AVX2-NEXT: [[TMP47:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[TMP55]] 2383; AVX2-NEXT: [[TMP48:%.*]] = getelementptr ptr, ptr [[TMP47]], i32 0 2384; AVX2-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[TMP48]], i32 8, <4 x i1> [[TMP46]], <4 x ptr> poison) 2385; AVX2-NEXT: [[TMP49:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD12]], zeroinitializer 2386; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP49]], splat (i1 true) 2387; AVX2-NEXT: [[TMP51:%.*]] = select <4 x i1> [[TMP46]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 2388; AVX2-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP55]] 2389; AVX2-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[TMP52]], i32 0 2390; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP53]], i32 8, <4 x i1> [[TMP51]]) 2391; AVX2-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4 2392; AVX2-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC9]] 2393; AVX2-NEXT: br i1 [[TMP54]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] 2394; AVX2: vec.epilog.middle.block: 2395; AVX2-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] 2396; AVX2-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 2397; AVX2: vec.epilog.scalar.ph: 2398; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 2399; AVX2-NEXT: br label [[FOR_BODY1:%.*]] 2400; AVX2: for.body: 2401; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2402; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 2403; AVX2-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2404; AVX2-NEXT: [[TMP42:%.*]] = and i8 [[TMP41]], 1 2405; AVX2-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP42]], 0 2406; AVX2-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2407; AVX2: land.lhs.true: 2408; AVX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[IN]], i64 [[INDVARS_IV]] 2409; AVX2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 2410; AVX2-NEXT: [[CMP3:%.*]] = icmp eq ptr [[TMP43]], null 2411; AVX2-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2412; AVX2: if.then: 2413; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 2414; AVX2-NEXT: store double 5.000000e-01, ptr [[ARRAYIDX5]], align 8 2415; AVX2-NEXT: br label [[FOR_INC]] 2416; AVX2: for.inc: 2417; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2418; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2419; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP36:![0-9]+]] 2420; AVX2: for.end.loopexit: 2421; AVX2-NEXT: br label [[FOR_END]] 2422; AVX2: for.end: 2423; AVX2-NEXT: ret void 2424; 2425; AVX512-LABEL: @foo8( 2426; AVX512-NEXT: entry: 2427; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2428; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2429; AVX512: iter.check: 2430; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2431; AVX512-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 8 2432; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 2433; AVX512: vector.main.loop.iter.check: 2434; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 2435; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2436; AVX512: vector.ph: 2437; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 2438; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2439; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 2440; AVX512: vector.body: 2441; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2442; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2443; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER:%.*]], i64 [[TMP0]] 2444; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 2445; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 2446; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 2447; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 2448; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 2449; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 2450; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 2451; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 2452; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1) 2453; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1) 2454; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1) 2455; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1) 2456; AVX512-NEXT: [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer 2457; AVX512-NEXT: [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer 2458; AVX512-NEXT: [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer 2459; AVX512-NEXT: [[TMP13:%.*]] = icmp eq <8 x i8> [[TMP9]], zeroinitializer 2460; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], splat (i1 true) 2461; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], splat (i1 true) 2462; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], splat (i1 true) 2463; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], splat (i1 true) 2464; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] 2465; AVX512-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 2466; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 2467; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 2468; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 2469; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP19]], i32 8, <8 x i1> [[TMP14]], <8 x ptr> poison) 2470; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP20]], i32 8, <8 x i1> [[TMP15]], <8 x ptr> poison) 2471; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP21]], i32 8, <8 x i1> [[TMP16]], <8 x ptr> poison) 2472; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP22]], i32 8, <8 x i1> [[TMP17]], <8 x ptr> poison) 2473; AVX512-NEXT: [[TMP23:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD]], zeroinitializer 2474; AVX512-NEXT: [[TMP24:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer 2475; AVX512-NEXT: [[TMP25:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer 2476; AVX512-NEXT: [[TMP26:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer 2477; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], splat (i1 true) 2478; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], splat (i1 true) 2479; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], splat (i1 true) 2480; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], splat (i1 true) 2481; AVX512-NEXT: [[TMP31:%.*]] = select <8 x i1> [[TMP14]], <8 x i1> [[TMP27]], <8 x i1> zeroinitializer 2482; AVX512-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP15]], <8 x i1> [[TMP28]], <8 x i1> zeroinitializer 2483; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer 2484; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer 2485; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT:%.*]], i64 [[TMP0]] 2486; AVX512-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP35]], i32 0 2487; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 2488; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 2489; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 2490; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) 2491; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) 2492; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) 2493; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) 2494; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 2495; AVX512-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2496; AVX512-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP53:![0-9]+]] 2497; AVX512: middle.block: 2498; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2499; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 2500; AVX512: vec.epilog.iter.check: 2501; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2502; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 2503; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] 2504; AVX512: vec.epilog.ph: 2505; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 2506; AVX512-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 8 2507; AVX512-NEXT: [[N_VEC9:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF8]] 2508; AVX512-NEXT: br label [[FOR_BODY:%.*]] 2509; AVX512: vec.epilog.vector.body: 2510; AVX512-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT13:%.*]], [[FOR_BODY]] ] 2511; AVX512-NEXT: [[TMP55:%.*]] = add i64 [[INDEX10]], 0 2512; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[TMP55]] 2513; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[TMP56]], i32 0 2514; AVX512-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i8>, ptr [[TMP57]], align 1 2515; AVX512-NEXT: [[TMP44:%.*]] = and <8 x i8> [[WIDE_LOAD11]], splat (i8 1) 2516; AVX512-NEXT: [[TMP45:%.*]] = icmp eq <8 x i8> [[TMP44]], zeroinitializer 2517; AVX512-NEXT: [[TMP46:%.*]] = xor <8 x i1> [[TMP45]], splat (i1 true) 2518; AVX512-NEXT: [[TMP47:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[TMP55]] 2519; AVX512-NEXT: [[TMP48:%.*]] = getelementptr ptr, ptr [[TMP47]], i32 0 2520; AVX512-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr [[TMP48]], i32 8, <8 x i1> [[TMP46]], <8 x ptr> poison) 2521; AVX512-NEXT: [[TMP49:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD12]], zeroinitializer 2522; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP49]], splat (i1 true) 2523; AVX512-NEXT: [[TMP51:%.*]] = select <8 x i1> [[TMP46]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer 2524; AVX512-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP55]] 2525; AVX512-NEXT: [[TMP53:%.*]] = getelementptr double, ptr [[TMP52]], i32 0 2526; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP53]], i32 8, <8 x i1> [[TMP51]]) 2527; AVX512-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 8 2528; AVX512-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC9]] 2529; AVX512-NEXT: br i1 [[TMP54]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] 2530; AVX512: vec.epilog.middle.block: 2531; AVX512-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] 2532; AVX512-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 2533; AVX512: vec.epilog.scalar.ph: 2534; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 2535; AVX512-NEXT: br label [[FOR_BODY1:%.*]] 2536; AVX512: for.body: 2537; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2538; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDVARS_IV]] 2539; AVX512-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2540; AVX512-NEXT: [[TMP42:%.*]] = and i8 [[TMP41]], 1 2541; AVX512-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP42]], 0 2542; AVX512-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2543; AVX512: land.lhs.true: 2544; AVX512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[IN]], i64 [[INDVARS_IV]] 2545; AVX512-NEXT: [[TMP43:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 2546; AVX512-NEXT: [[CMP3:%.*]] = icmp eq ptr [[TMP43]], null 2547; AVX512-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2548; AVX512: if.then: 2549; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[INDVARS_IV]] 2550; AVX512-NEXT: store double 5.000000e-01, ptr [[ARRAYIDX5]], align 8 2551; AVX512-NEXT: br label [[FOR_INC]] 2552; AVX512: for.inc: 2553; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2554; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2555; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP55:![0-9]+]] 2556; AVX512: for.end.loopexit: 2557; AVX512-NEXT: br label [[FOR_END]] 2558; AVX512: for.end: 2559; AVX512-NEXT: ret void 2560; 2561entry: 2562 %cmp5 = icmp eq i32 %size, 0 2563 br i1 %cmp5, label %for.end, label %for.body.preheader 2564 2565for.body.preheader: ; preds = %entry 2566 %wide.trip.count = zext i32 %size to i64 2567 br label %for.body 2568 2569for.body: ; preds = %for.inc, %for.body.preheader 2570 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 2571 %arrayidx = getelementptr inbounds i8, ptr %trigger, i64 %indvars.iv 2572 %0 = load i8, ptr %arrayidx, align 1 2573 %1 = and i8 %0, 1 2574 %tobool = icmp eq i8 %1, 0 2575 br i1 %tobool, label %for.inc, label %land.lhs.true 2576 2577land.lhs.true: ; preds = %for.body 2578 %arrayidx2 = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv 2579 %2 = load ptr, ptr %arrayidx2, align 8 2580 %cmp3 = icmp eq ptr %2, null 2581 br i1 %cmp3, label %for.inc, label %if.then 2582 2583if.then: ; preds = %land.lhs.true 2584 %arrayidx5 = getelementptr inbounds double, ptr %out, i64 %indvars.iv 2585 store double 5.000000e-01, ptr %arrayidx5, align 8 2586 br label %for.inc 2587 2588for.inc: ; preds = %land.lhs.true, %for.body, %if.then 2589 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2590 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 2591 br i1 %exitcond, label %for.end, label %for.body 2592 2593for.end: ; preds = %for.inc, %entry 2594 ret void 2595} 2596 2597attributes #0 = { norecurse nounwind } 2598