1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=TFNONE 3; RUN: opt < %s -passes=loop-vectorize,instsimplify,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s --check-prefixes=TFCOMMON,TFALWAYS 4; RUN: opt < %s -passes=loop-vectorize,instsimplify,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s --check-prefixes=TFCOMMON,TFFALLBACK 5; RUN: opt < %s -passes=loop-vectorize,instsimplify,simplifycfg -force-vector-interleave=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s --check-prefixes=TFA_INTERLEAVE 6 7target triple = "aarch64-unknown-linux-gnu" 8 9; A call whose argument must be widened. We check that tail folding uses the 10; primary mask, and that without tail folding we synthesize an all-true mask. 11define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { 12; TFNONE-LABEL: @test_widen( 13; TFNONE-NEXT: entry: 14; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 15; TFNONE: vector.ph: 16; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 17; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 18; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 19; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 20; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 21; TFNONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 22; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] 23; TFNONE: vector.body: 24; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 25; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 26; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 27; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) 28; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 29; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 30; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 31; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 32; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 33; TFNONE: middle.block: 34; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 35; TFNONE: scalar.ph: 36; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 37; TFNONE-NEXT: br label [[FOR_BODY:%.*]] 38; TFNONE: for.body: 39; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 40; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 41; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 42; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]] 43; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 44; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 45; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 46; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 47; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 48; TFNONE: for.cond.cleanup: 49; TFNONE-NEXT: ret void 50; 51; TFCOMMON-LABEL: @test_widen( 52; TFCOMMON-NEXT: entry: 53; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 54; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 55; TFCOMMON-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 56; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 57; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 58; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 59; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 60; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 61; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 62; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] 63; TFCOMMON: vector.body: 64; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 65; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 66; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 67; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 68; TFCOMMON-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 69; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 70; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 71; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 72; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 73; TFCOMMON-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 74; TFCOMMON-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x i1> [[TMP8]], i32 0 75; TFCOMMON-NEXT: br i1 [[TMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 76; TFCOMMON: for.cond.cleanup: 77; TFCOMMON-NEXT: ret void 78; 79; TFA_INTERLEAVE-LABEL: @test_widen( 80; TFA_INTERLEAVE-NEXT: entry: 81; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 82; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 83; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 84; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 85; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 86; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 87; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 88; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 89; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 90; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 91; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 92; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) 93; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 94; TFA_INTERLEAVE: vector.body: 95; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 96; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 97; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ] 98; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 99; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 100; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 101; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] 102; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 103; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison) 104; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 105; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 106; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 107; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 108; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 109; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] 110; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 111; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP12]], ptr [[TMP16]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 112; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 113; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() 114; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 115; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] 116; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 117; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) 118; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 119; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i1> [[TMP20]], i32 0 120; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 121; TFA_INTERLEAVE: for.cond.cleanup: 122; TFA_INTERLEAVE-NEXT: ret void 123; 124entry: 125 br label %for.body 126 127for.body: 128 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 129 %gep = getelementptr i64, ptr %b, i64 %indvars.iv 130 %load = load i64, ptr %gep 131 %call = call i64 @foo(i64 %load) #1 132 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 133 store i64 %call, ptr %arrayidx 134 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 135 %exitcond = icmp eq i64 %indvars.iv.next, 1025 136 br i1 %exitcond, label %for.cond.cleanup, label %for.body 137 138for.cond.cleanup: 139 ret void 140} 141 142; Check that a simple conditional call can be vectorized. 143define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { 144; TFNONE-LABEL: @test_if_then( 145; TFNONE-NEXT: entry: 146; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 147; TFNONE: vector.ph: 148; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 149; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 150; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 151; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 152; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 153; TFNONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 154; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] 155; TFNONE: vector.body: 156; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 157; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 158; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 159; TFNONE-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 50) 160; TFNONE-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> [[TMP7]]) 161; TFNONE-NEXT: [[TMP9:%.*]] = xor <vscale x 2 x i1> [[TMP7]], splat (i1 true) 162; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP8]] 163; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] 164; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP10]], align 8 165; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 166; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 167; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 168; TFNONE: middle.block: 169; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 170; TFNONE: scalar.ph: 171; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 172; TFNONE-NEXT: br label [[FOR_BODY:%.*]] 173; TFNONE: for.body: 174; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 175; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 176; TFNONE-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 177; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP12]], 50 178; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] 179; TFNONE: if.then: 180; TFNONE-NEXT: [[TMP13:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR3]] 181; TFNONE-NEXT: br label [[IF_END]] 182; TFNONE: if.end: 183; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP13]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] 184; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] 185; TFNONE-NEXT: store i64 [[TMP14]], ptr [[ARRAYIDX1]], align 8 186; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 187; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 188; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 189; TFNONE: for.cond.cleanup: 190; TFNONE-NEXT: ret void 191; 192; TFCOMMON-LABEL: @test_if_then( 193; TFCOMMON-NEXT: entry: 194; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 195; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 196; TFCOMMON-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 197; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 198; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 199; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 200; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 201; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 202; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 203; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] 204; TFCOMMON: vector.body: 205; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 206; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 207; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 208; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 209; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) 210; TFCOMMON-NEXT: [[TMP7:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer 211; TFCOMMON-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP7]]) 212; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> [[TMP8]], <vscale x 2 x i64> zeroinitializer 213; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] 214; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 215; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 216; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 217; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 218; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x i1> [[TMP10]], i32 0 219; TFCOMMON-NEXT: br i1 [[TMP11]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 220; TFCOMMON: for.cond.cleanup: 221; TFCOMMON-NEXT: ret void 222; 223; TFA_INTERLEAVE-LABEL: @test_if_then( 224; TFA_INTERLEAVE-NEXT: entry: 225; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 226; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 227; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 228; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 229; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 230; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 231; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 232; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 233; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 234; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 235; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 236; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) 237; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 238; TFA_INTERLEAVE: vector.body: 239; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 240; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 241; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[VECTOR_BODY]] ] 242; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 243; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 244; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 245; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] 246; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 247; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison) 248; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) 249; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], splat (i64 50) 250; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer 251; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer 252; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP13]]) 253; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP14]]) 254; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64> zeroinitializer 255; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i64> [[TMP16]], <vscale x 2 x i64> zeroinitializer 256; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] 257; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() 258; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 259; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] 260; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP17]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 261; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP20]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 262; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 263; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 264; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 265; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] 266; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 267; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) 268; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 269; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP24]], i32 0 270; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 271; TFA_INTERLEAVE: for.cond.cleanup: 272; TFA_INTERLEAVE-NEXT: ret void 273; 274entry: 275 br label %for.body 276 277for.body: 278 %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] 279 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 280 %0 = load i64, ptr %arrayidx, align 8 281 %cmp = icmp ugt i64 %0, 50 282 br i1 %cmp, label %if.then, label %if.end 283 284if.then: 285 %1 = call i64 @foo(i64 %0) #1 286 br label %if.end 287 288if.end: 289 %2 = phi i64 [%1, %if.then], [0, %for.body] 290 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv 291 store i64 %2, ptr %arrayidx1, align 8 292 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 293 %exitcond = icmp eq i64 %indvars.iv.next, 1025 294 br i1 %exitcond, label %for.cond.cleanup, label %for.body 295 296for.cond.cleanup: 297 ret void 298} 299 300; This checks the ability to handle masking of an if-then-else CFG with 301; calls inside the conditional blocks. Although one of the calls has a 302; uniform parameter and the metadata lists a uniform variant, right now 303; we just see a splat of the parameter instead. More work needed. 304define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { 305; TFNONE-LABEL: @test_widen_if_then_else( 306; TFNONE-NEXT: entry: 307; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 308; TFNONE: vector.ph: 309; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 310; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 311; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 312; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 313; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 314; TFNONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 315; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] 316; TFNONE: vector.body: 317; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 318; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 319; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 320; TFNONE-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 50) 321; TFNONE-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[TMP7]], splat (i1 true) 322; TFNONE-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]]) 323; TFNONE-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> [[TMP7]]) 324; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP10]] 325; TFNONE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] 326; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP11]], align 8 327; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 328; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 329; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 330; TFNONE: middle.block: 331; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 332; TFNONE: scalar.ph: 333; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 334; TFNONE-NEXT: br label [[FOR_BODY:%.*]] 335; TFNONE: for.body: 336; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 337; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 338; TFNONE-NEXT: [[TMP13:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 339; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 50 340; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 341; TFNONE: if.then: 342; TFNONE-NEXT: [[TMP14:%.*]] = call i64 @foo(i64 [[TMP13]]) #[[ATTR4:[0-9]+]] 343; TFNONE-NEXT: br label [[IF_END]] 344; TFNONE: if.else: 345; TFNONE-NEXT: [[TMP15:%.*]] = call i64 @foo(i64 0) #[[ATTR4]] 346; TFNONE-NEXT: br label [[IF_END]] 347; TFNONE: if.end: 348; TFNONE-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP14]], [[IF_THEN]] ], [ [[TMP15]], [[IF_ELSE]] ] 349; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] 350; TFNONE-NEXT: store i64 [[TMP16]], ptr [[ARRAYIDX1]], align 8 351; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 352; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 353; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 354; TFNONE: for.cond.cleanup: 355; TFNONE-NEXT: ret void 356; 357; TFCOMMON-LABEL: @test_widen_if_then_else( 358; TFCOMMON-NEXT: entry: 359; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 360; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 361; TFCOMMON-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 362; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 363; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 364; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 365; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 366; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 367; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 368; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] 369; TFCOMMON: vector.body: 370; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 371; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 372; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 373; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 374; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) 375; TFCOMMON-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[TMP6]], splat (i1 true) 376; TFCOMMON-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i1> zeroinitializer 377; TFCOMMON-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]]) 378; TFCOMMON-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer 379; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP10]]) 380; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]] 381; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] 382; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 383; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 384; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 385; TFCOMMON-NEXT: [[TMP13:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 386; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <vscale x 2 x i1> [[TMP13]], i32 0 387; TFCOMMON-NEXT: br i1 [[TMP14]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 388; TFCOMMON: for.cond.cleanup: 389; TFCOMMON-NEXT: ret void 390; 391; TFA_INTERLEAVE-LABEL: @test_widen_if_then_else( 392; TFA_INTERLEAVE-NEXT: entry: 393; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 394; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 395; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 396; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 397; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 398; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 399; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 400; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 401; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 402; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 403; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 404; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) 405; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 406; TFA_INTERLEAVE: vector.body: 407; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 408; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 409; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[VECTOR_BODY]] ] 410; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 411; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 412; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 413; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] 414; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 415; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison) 416; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) 417; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], splat (i64 50) 418; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor <vscale x 2 x i1> [[TMP11]], splat (i1 true) 419; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[TMP12]], splat (i1 true) 420; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> zeroinitializer 421; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer 422; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP15]]) 423; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP16]]) 424; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer 425; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer 426; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP19]]) 427; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP20]]) 428; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i64> [[TMP17]], <vscale x 2 x i64> [[TMP21]] 429; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP16]], <vscale x 2 x i64> [[TMP18]], <vscale x 2 x i64> [[TMP22]] 430; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] 431; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() 432; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 433; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP25]] 434; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP23]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 435; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP26]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 436; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 437; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() 438; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 2 439; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]] 440; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 441; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP29]], i64 1025) 442; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 443; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <vscale x 2 x i1> [[TMP30]], i32 0 444; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 445; TFA_INTERLEAVE: for.cond.cleanup: 446; TFA_INTERLEAVE-NEXT: ret void 447; 448entry: 449 br label %for.body 450 451for.body: 452 %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] 453 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 454 %0 = load i64, ptr %arrayidx, align 8 455 %cmp = icmp ugt i64 %0, 50 456 br i1 %cmp, label %if.then, label %if.else 457 458if.then: 459 %1 = call i64 @foo(i64 %0) #0 460 br label %if.end 461 462if.else: 463 %2 = call i64 @foo(i64 0) #0 464 br label %if.end 465 466if.end: 467 %3 = phi i64 [%1, %if.then], [%2, %if.else] 468 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv 469 store i64 %3, ptr %arrayidx1, align 8 470 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 471 %exitcond = icmp eq i64 %indvars.iv.next, 1025 472 br i1 %exitcond, label %for.cond.cleanup, label %for.body 473 474for.cond.cleanup: 475 ret void 476} 477 478; A call whose argument must be widened, where the vector variant does not have 479; a mask. Forcing tail folding results in no vectorized call, whereas an 480; unpredicated body with scalar tail can use the unmasked variant. 481define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { 482; TFNONE-LABEL: @test_widen_nomask( 483; TFNONE-NEXT: entry: 484; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 485; TFNONE: vector.ph: 486; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 487; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 488; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 489; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 490; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 491; TFNONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 492; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] 493; TFNONE: vector.body: 494; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 495; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 496; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 497; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) 498; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 499; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 500; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 501; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 502; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 503; TFNONE: middle.block: 504; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 505; TFNONE: scalar.ph: 506; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 507; TFNONE-NEXT: br label [[FOR_BODY:%.*]] 508; TFNONE: for.body: 509; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 510; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 511; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 512; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] 513; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 514; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 515; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 516; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 517; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 518; TFNONE: for.cond.cleanup: 519; TFNONE-NEXT: ret void 520; 521; TFALWAYS-LABEL: @test_widen_nomask( 522; TFALWAYS-NEXT: entry: 523; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] 524; TFALWAYS: for.body: 525; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 526; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] 527; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 528; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] 529; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] 530; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 531; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 532; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 533; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 534; TFALWAYS: for.cond.cleanup: 535; TFALWAYS-NEXT: ret void 536; 537; TFFALLBACK-LABEL: @test_widen_nomask( 538; TFFALLBACK-NEXT: entry: 539; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 540; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 541; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 542; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 543; TFFALLBACK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 544; TFFALLBACK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 545; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] 546; TFFALLBACK: vector.body: 547; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 548; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 549; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 550; TFFALLBACK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) 551; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 552; TFFALLBACK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 553; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 554; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 555; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 556; TFFALLBACK: for.body: 557; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VECTOR_BODY]] ] 558; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 559; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 560; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] 561; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 562; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 563; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 564; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 565; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 566; TFFALLBACK: for.cond.cleanup: 567; TFFALLBACK-NEXT: ret void 568; 569; TFA_INTERLEAVE-LABEL: @test_widen_nomask( 570; TFA_INTERLEAVE-NEXT: entry: 571; TFA_INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 572; TFA_INTERLEAVE: for.body: 573; TFA_INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 574; TFA_INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] 575; TFA_INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 576; TFA_INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] 577; TFA_INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] 578; TFA_INTERLEAVE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 579; TFA_INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 580; TFA_INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 581; TFA_INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 582; TFA_INTERLEAVE: for.cond.cleanup: 583; TFA_INTERLEAVE-NEXT: ret void 584; 585entry: 586 br label %for.body 587 588for.body: 589 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 590 %gep = getelementptr i64, ptr %b, i64 %indvars.iv 591 %load = load i64, ptr %gep 592 %call = call i64 @foo(i64 %load) #2 593 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 594 store i64 %call, ptr %arrayidx 595 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 596 %exitcond = icmp eq i64 %indvars.iv.next, 1025 597 br i1 %exitcond, label %for.cond.cleanup, label %for.body 598 599for.cond.cleanup: 600 ret void 601} 602 603; If both masked and unmasked options are present, we expect to see tail folding 604; use the masked version and unpredicated body with scalar tail use the unmasked 605; version. 606define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { 607; TFNONE-LABEL: @test_widen_optmask( 608; TFNONE-NEXT: entry: 609; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 610; TFNONE: vector.ph: 611; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 612; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 613; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 614; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 615; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 616; TFNONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 617; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] 618; TFNONE: vector.body: 619; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 620; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 621; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 622; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) 623; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 624; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 625; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 626; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 627; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 628; TFNONE: middle.block: 629; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 630; TFNONE: scalar.ph: 631; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 632; TFNONE-NEXT: br label [[FOR_BODY:%.*]] 633; TFNONE: for.body: 634; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 635; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 636; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 637; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]] 638; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 639; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 640; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 641; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 642; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 643; TFNONE: for.cond.cleanup: 644; TFNONE-NEXT: ret void 645; 646; TFALWAYS-LABEL: @test_widen_optmask( 647; TFALWAYS-NEXT: entry: 648; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 649; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 650; TFALWAYS-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 651; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 652; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 653; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 654; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 655; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 656; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 657; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]] 658; TFALWAYS: vector.body: 659; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 660; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 661; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 662; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 663; TFALWAYS-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 664; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 665; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 666; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 667; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 668; TFALWAYS-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 669; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x i1> [[TMP8]], i32 0 670; TFALWAYS-NEXT: br i1 [[TMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 671; TFALWAYS: for.cond.cleanup: 672; TFALWAYS-NEXT: ret void 673; 674; TFFALLBACK-LABEL: @test_widen_optmask( 675; TFFALLBACK-NEXT: entry: 676; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 677; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 678; TFFALLBACK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 679; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 680; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 681; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 682; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 683; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 684; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 685; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] 686; TFFALLBACK: vector.body: 687; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 688; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 689; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 690; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 691; TFFALLBACK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 692; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 693; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 694; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 695; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 696; TFFALLBACK-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 697; TFFALLBACK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x i1> [[TMP8]], i32 0 698; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 699; TFFALLBACK: for.cond.cleanup: 700; TFFALLBACK-NEXT: ret void 701; 702; TFA_INTERLEAVE-LABEL: @test_widen_optmask( 703; TFA_INTERLEAVE-NEXT: entry: 704; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 705; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 706; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 707; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 708; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 709; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 710; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 711; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 712; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 713; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 714; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 715; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) 716; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 717; TFA_INTERLEAVE: vector.body: 718; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 719; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 720; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ] 721; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] 722; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 723; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 724; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] 725; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 726; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison) 727; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 728; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 729; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 730; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 731; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 732; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] 733; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP11]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 734; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP12]], ptr [[TMP16]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 735; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 736; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() 737; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 738; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] 739; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 740; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) 741; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 742; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i1> [[TMP20]], i32 0 743; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 744; TFA_INTERLEAVE: for.cond.cleanup: 745; TFA_INTERLEAVE-NEXT: ret void 746; 747entry: 748 br label %for.body 749 750for.body: 751 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 752 %gep = getelementptr i64, ptr %b, i64 %indvars.iv 753 %load = load i64, ptr %gep 754 %call = call i64 @foo(i64 %load) #3 755 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 756 store i64 %call, ptr %arrayidx 757 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 758 %exitcond = icmp eq i64 %indvars.iv.next, 1025 759 br i1 %exitcond, label %for.cond.cleanup, label %for.body 760 761for.cond.cleanup: 762 ret void 763} 764 765 766; An fmuladd intrinsic followed by a call; we want to make sure we correctly 767; pick up the second call and assign a vector variant to it. 768define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, double %m) #4 { 769; TFNONE-LABEL: @test_widen_fmuladd_and_call( 770; TFNONE-NEXT: entry: 771; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 772; TFNONE: vector.ph: 773; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 774; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 775; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 776; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 777; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 778; TFNONE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 779; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0 780; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 781; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] 782; TFNONE: vector.body: 783; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 784; TFNONE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 785; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] 786; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP6]], align 8 787; TFNONE-NEXT: [[TMP7:%.*]] = fmul <vscale x 2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 788; TFNONE-NEXT: [[TMP8:%.*]] = fptoui <vscale x 2 x double> [[WIDE_LOAD]] to <vscale x 2 x i64> 789; TFNONE-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP8]], <vscale x 2 x i1> splat (i1 true)) 790; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 791; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP10]], align 8 792; TFNONE-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP7]]) 793; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 794; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 795; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 796; TFNONE: middle.block: 797; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 798; TFNONE: scalar.ph: 799; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 800; TFNONE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 801; TFNONE-NEXT: br label [[FOR_BODY:%.*]] 802; TFNONE: for.body: 803; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 804; TFNONE-NEXT: [[FMA_SUM:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] 805; TFNONE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[INDVARS_IV]] 806; TFNONE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8 807; TFNONE-NEXT: [[MULADD]] = tail call double @llvm.fmuladd.f64(double [[LOAD]], double [[M]], double [[FMA_SUM]]) 808; TFNONE-NEXT: [[TOINT:%.*]] = fptoui double [[LOAD]] to i64 809; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[TOINT]]) #[[ATTR3]] 810; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 811; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 812; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 813; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 814; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 815; TFNONE: for.cond.cleanup: 816; TFNONE-NEXT: [[MULADD_LCSSA:%.*]] = phi double [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 817; TFNONE-NEXT: ret double [[MULADD_LCSSA]] 818; 819; TFALWAYS-LABEL: @test_widen_fmuladd_and_call( 820; TFALWAYS-NEXT: entry: 821; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 822; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 823; TFALWAYS-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 824; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 825; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 826; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 827; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 828; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 829; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 830; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0 831; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 832; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]] 833; TFALWAYS: vector.body: 834; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 835; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 836; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 837; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] 838; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison) 839; TFALWAYS-NEXT: [[TMP6:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] 840; TFALWAYS-NEXT: [[TMP7:%.*]] = fptoui <vscale x 2 x double> [[WIDE_MASKED_LOAD]] to <vscale x 2 x i64> 841; TFALWAYS-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 842; TFALWAYS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 843; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 844; TFALWAYS-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> splat (double -0.000000e+00) 845; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP10]]) 846; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 847; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 848; TFALWAYS-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 849; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i1> [[TMP12]], i32 0 850; TFALWAYS-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 851; TFALWAYS: for.cond.cleanup: 852; TFALWAYS-NEXT: ret double [[TMP11]] 853; 854; TFFALLBACK-LABEL: @test_widen_fmuladd_and_call( 855; TFFALLBACK-NEXT: entry: 856; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 857; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 858; TFFALLBACK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 859; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 860; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 861; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 862; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 863; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 864; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 865; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0 866; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 867; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] 868; TFFALLBACK: vector.body: 869; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 870; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 871; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 872; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] 873; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison) 874; TFFALLBACK-NEXT: [[TMP6:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] 875; TFFALLBACK-NEXT: [[TMP7:%.*]] = fptoui <vscale x 2 x double> [[WIDE_MASKED_LOAD]] to <vscale x 2 x i64> 876; TFFALLBACK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 877; TFFALLBACK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 878; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 879; TFFALLBACK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> splat (double -0.000000e+00) 880; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP10]]) 881; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 882; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 883; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 884; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i1> [[TMP12]], i32 0 885; TFFALLBACK-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 886; TFFALLBACK: for.cond.cleanup: 887; TFFALLBACK-NEXT: ret double [[TMP11]] 888; 889; TFA_INTERLEAVE-LABEL: @test_widen_fmuladd_and_call( 890; TFA_INTERLEAVE-NEXT: entry: 891; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 892; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 893; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 894; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 895; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 896; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 897; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 898; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 899; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 900; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 901; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) 902; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) 903; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0 904; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 905; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 906; TFA_INTERLEAVE: vector.body: 907; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 908; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 909; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ] 910; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] 911; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] 912; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 913; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 914; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP9]] 915; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison) 916; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison) 917; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] 918; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD3]], [[BROADCAST_SPLAT]] 919; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fptoui <vscale x 2 x double> [[WIDE_MASKED_LOAD]] to <vscale x 2 x i64> 920; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fptoui <vscale x 2 x double> [[WIDE_MASKED_LOAD3]] to <vscale x 2 x i64> 921; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP13]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 922; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP14]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 923; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] 924; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() 925; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 926; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] 927; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr [[TMP17]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 928; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr [[TMP20]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) 929; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> splat (double -0.000000e+00) 930; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP21]]) 931; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> [[TMP12]], <vscale x 2 x double> splat (double -0.000000e+00) 932; TFA_INTERLEAVE-NEXT: [[TMP24]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP22]], <vscale x 2 x double> [[TMP23]]) 933; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 934; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 935; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 936; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] 937; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) 938; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) 939; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 940; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i1> [[TMP28]], i32 0 941; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 942; TFA_INTERLEAVE: for.cond.cleanup: 943; TFA_INTERLEAVE-NEXT: ret double [[TMP24]] 944; 945entry: 946 br label %for.body 947 948for.body: 949 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 950 %fma_sum = phi double [ 0.000000e+00, %entry ], [ %muladd, %for.body ] 951 %gep = getelementptr double, ptr %b, i64 %indvars.iv 952 %load = load double, ptr %gep 953 %muladd = tail call double @llvm.fmuladd.f64(double %load, double %m, double %fma_sum) 954 %toint = fptoui double %load to i64 955 %call = call i64 @foo(i64 %toint) #1 956 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 957 store i64 %call, ptr %arrayidx 958 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 959 %exitcond = icmp eq i64 %indvars.iv.next, 1025 960 br i1 %exitcond, label %for.cond.cleanup, label %for.body 961 962for.cond.cleanup: 963 ret double %muladd 964} 965 966declare i64 @foo(i64) 967declare double @llvm.fmuladd.f64(double, double, double) 968 969;; scalable vector variants of foo 970declare <vscale x 2 x i64> @foo_uniform(i64, <vscale x 2 x i1>) 971declare <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64>, <vscale x 2 x i1>) 972declare <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64>) 973 974attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(foo_vector),_ZGVsMxu_foo(foo_uniform)" } 975attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(foo_vector)" } 976attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vector_nomask)" } 977attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vector_nomask),_ZGVsMxv_foo(foo_vector)" } 978attributes #4 = { "target-features"="+sve" vscale_range(2,16) "no-trapping-math"="false" } 979