1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=0 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=SCALABLE 3; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=off -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=FIXEDLEN 4; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=0 -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=TF-SCALABLE 5; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=off -riscv-v-vector-bits-min=-1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=TF-FIXEDLEN 6 7 8 9target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 10target triple = "riscv64" 11 12define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 13; SCALABLE-LABEL: @uniform_load( 14; SCALABLE-NEXT: entry: 15; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 16; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 17; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 18; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 19; SCALABLE: vector.ph: 20; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 21; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 22; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 23; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 24; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 25; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 26; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 27; SCALABLE: vector.body: 28; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 29; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 30; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B:%.*]], align 8 31; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7]], i64 0 32; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 33; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 34; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 35; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8 36; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 37; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 38; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 39; SCALABLE: middle.block: 40; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 41; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 42; SCALABLE: scalar.ph: 43; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 44; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 45; SCALABLE: for.body: 46; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 47; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 48; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 49; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 50; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 51; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 52; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 53; SCALABLE: for.end: 54; SCALABLE-NEXT: ret void 55; 56; FIXEDLEN-LABEL: @uniform_load( 57; FIXEDLEN-NEXT: entry: 58; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 59; FIXEDLEN: vector.ph: 60; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 61; FIXEDLEN: vector.body: 62; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 63; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 64; FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 65; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 66; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 67; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 68; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 69; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 70; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 71; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 72; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 73; FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 74; FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 75; FIXEDLEN: middle.block: 76; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 77; FIXEDLEN: scalar.ph: 78; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 79; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 80; FIXEDLEN: for.body: 81; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 82; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 83; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 84; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 85; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 86; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 87; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 88; FIXEDLEN: for.end: 89; FIXEDLEN-NEXT: ret void 90; 91; TF-SCALABLE-LABEL: @uniform_load( 92; TF-SCALABLE-NEXT: entry: 93; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 94; TF-SCALABLE: vector.ph: 95; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 96; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 97; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 98; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 99; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 100; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 101; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 102; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 103; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 104; TF-SCALABLE: vector.body: 105; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 106; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 107; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) 108; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 8 109; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 110; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 111; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 112; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 113; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 114; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 115; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 116; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 117; TF-SCALABLE: middle.block: 118; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 119; TF-SCALABLE: scalar.ph: 120; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 121; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 122; TF-SCALABLE: for.body: 123; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 124; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 125; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 126; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 127; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 128; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 129; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 130; TF-SCALABLE: for.end: 131; TF-SCALABLE-NEXT: ret void 132; 133; TF-FIXEDLEN-LABEL: @uniform_load( 134; TF-FIXEDLEN-NEXT: entry: 135; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 136; TF-FIXEDLEN: vector.ph: 137; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 138; TF-FIXEDLEN: vector.body: 139; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 140; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 141; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 142; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 143; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 144; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 145; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 146; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 147; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 148; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 149; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 150; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 151; TF-FIXEDLEN: middle.block: 152; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 153; TF-FIXEDLEN: scalar.ph: 154; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 155; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 156; TF-FIXEDLEN: for.body: 157; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 158; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 159; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 160; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 161; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 162; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 163; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 164; TF-FIXEDLEN: for.end: 165; TF-FIXEDLEN-NEXT: ret void 166; 167entry: 168 br label %for.body 169 170for.body: 171 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 172 %v = load i64, ptr %b, align 8 173 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 174 store i64 %v, ptr %arrayidx 175 %iv.next = add nuw nsw i64 %iv, 1 176 %exitcond.not = icmp eq i64 %iv.next, 1025 177 br i1 %exitcond.not, label %for.end, label %for.body 178 179for.end: 180 ret void 181} 182 183define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 184; SCALABLE-LABEL: @uniform_load_outside_use( 185; SCALABLE-NEXT: entry: 186; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 187; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 188; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 189; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 190; SCALABLE: vector.ph: 191; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 192; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 193; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 194; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 195; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 196; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 197; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 198; SCALABLE: vector.body: 199; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 200; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 201; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B:%.*]], align 8 202; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7]], i64 0 203; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 204; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 205; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 206; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8 207; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 208; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 209; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 210; SCALABLE: middle.block: 211; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 212; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 213; SCALABLE: scalar.ph: 214; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 215; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 216; SCALABLE: for.body: 217; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 218; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 219; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 220; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 221; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 222; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 223; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 224; SCALABLE: for.end: 225; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 226; SCALABLE-NEXT: ret i64 [[V_LCSSA]] 227; 228; FIXEDLEN-LABEL: @uniform_load_outside_use( 229; FIXEDLEN-NEXT: entry: 230; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 231; FIXEDLEN: vector.ph: 232; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 233; FIXEDLEN: vector.body: 234; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 235; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 236; FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 237; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 238; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 239; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 240; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 241; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 242; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 243; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 244; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 245; FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 246; FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 247; FIXEDLEN: middle.block: 248; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 249; FIXEDLEN: scalar.ph: 250; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 251; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 252; FIXEDLEN: for.body: 253; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 254; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 255; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 256; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 257; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 258; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 259; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 260; FIXEDLEN: for.end: 261; FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] 262; FIXEDLEN-NEXT: ret i64 [[V_LCSSA]] 263; 264; TF-SCALABLE-LABEL: @uniform_load_outside_use( 265; TF-SCALABLE-NEXT: entry: 266; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 267; TF-SCALABLE: for.body: 268; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 269; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 270; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 271; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 272; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 273; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 274; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 275; TF-SCALABLE: for.end: 276; TF-SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 277; TF-SCALABLE-NEXT: ret i64 [[V_LCSSA]] 278; 279; TF-FIXEDLEN-LABEL: @uniform_load_outside_use( 280; TF-FIXEDLEN-NEXT: entry: 281; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 282; TF-FIXEDLEN: for.body: 283; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 284; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 285; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 286; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 287; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 288; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 289; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 290; TF-FIXEDLEN: for.end: 291; TF-FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 292; TF-FIXEDLEN-NEXT: ret i64 [[V_LCSSA]] 293; 294entry: 295 br label %for.body 296 297for.body: 298 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 299 %v = load i64, ptr %b, align 8 300 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 301 store i64 %v, ptr %arrayidx 302 %iv.next = add nuw nsw i64 %iv, 1 303 %exitcond.not = icmp eq i64 %iv.next, 1025 304 br i1 %exitcond.not, label %for.end, label %for.body 305 306for.end: 307 ret i64 %v 308} 309 310 311define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 312; SCALABLE-LABEL: @conditional_uniform_load( 313; SCALABLE-NEXT: entry: 314; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 315; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 316; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 317; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 318; SCALABLE: vector.ph: 319; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 320; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 321; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 322; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 323; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 324; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 325; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 326; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) 327; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]] 328; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] 329; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0 330; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 331; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0 332; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 333; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 334; SCALABLE: vector.body: 335; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 336; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 337; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 338; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) 339; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> poison) 340; SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer 341; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] 342; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 343; SCALABLE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP13]], align 8 344; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 345; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] 346; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 347; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 348; SCALABLE: middle.block: 349; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 350; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 351; SCALABLE: scalar.ph: 352; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 353; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 354; SCALABLE: for.body: 355; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 356; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 357; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 358; SCALABLE: do_load: 359; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 360; SCALABLE-NEXT: br label [[LATCH]] 361; SCALABLE: latch: 362; SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 363; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 364; SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 365; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 366; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 367; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 368; SCALABLE: for.end: 369; SCALABLE-NEXT: ret void 370; 371; FIXEDLEN-LABEL: @conditional_uniform_load( 372; FIXEDLEN-NEXT: entry: 373; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 374; FIXEDLEN: vector.ph: 375; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 376; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer 377; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 378; FIXEDLEN: vector.body: 379; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 380; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 381; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 382; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 383; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) 384; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) 385; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison) 386; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) 387; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer 388; FIXEDLEN-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER1]], <4 x i64> zeroinitializer 389; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 390; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 391; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 392; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 393; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 394; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 395; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) 396; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 397; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 398; FIXEDLEN: middle.block: 399; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 400; FIXEDLEN: scalar.ph: 401; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 402; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 403; FIXEDLEN: for.body: 404; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 405; FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 406; FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 407; FIXEDLEN: do_load: 408; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 409; FIXEDLEN-NEXT: br label [[LATCH]] 410; FIXEDLEN: latch: 411; FIXEDLEN-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 412; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 413; FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 414; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 415; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 416; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 417; FIXEDLEN: for.end: 418; FIXEDLEN-NEXT: ret void 419; 420; TF-SCALABLE-LABEL: @conditional_uniform_load( 421; TF-SCALABLE-NEXT: entry: 422; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 423; TF-SCALABLE: vector.ph: 424; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 425; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 426; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 427; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 428; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 429; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 430; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 431; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 432; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 433; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1) 434; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] 435; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] 436; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 437; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 438; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0 439; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 440; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 441; TF-SCALABLE: vector.body: 442; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 443; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 444; TF-SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 445; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025) 446; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) 447; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> zeroinitializer 448; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> poison) 449; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer 450; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] 451; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 452; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 453; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 454; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] 455; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 456; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 457; TF-SCALABLE: middle.block: 458; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 459; TF-SCALABLE: scalar.ph: 460; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 461; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 462; TF-SCALABLE: for.body: 463; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 464; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 465; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 466; TF-SCALABLE: do_load: 467; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 468; TF-SCALABLE-NEXT: br label [[LATCH]] 469; TF-SCALABLE: latch: 470; TF-SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 471; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 472; TF-SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 473; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 474; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 475; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 476; TF-SCALABLE: for.end: 477; TF-SCALABLE-NEXT: ret void 478; 479; TF-FIXEDLEN-LABEL: @conditional_uniform_load( 480; TF-FIXEDLEN-NEXT: entry: 481; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 482; TF-FIXEDLEN: vector.ph: 483; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 484; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer 485; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 486; TF-FIXEDLEN: vector.body: 487; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 488; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 489; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 490; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 491; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) 492; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer 493; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) 494; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer 495; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 496; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 497; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 498; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 499; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 500; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 501; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 502; TF-FIXEDLEN: middle.block: 503; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 504; TF-FIXEDLEN: scalar.ph: 505; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 506; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 507; TF-FIXEDLEN: for.body: 508; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 509; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 510; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 511; TF-FIXEDLEN: do_load: 512; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 513; TF-FIXEDLEN-NEXT: br label [[LATCH]] 514; TF-FIXEDLEN: latch: 515; TF-FIXEDLEN-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 516; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 517; TF-FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 518; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 519; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 520; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 521; TF-FIXEDLEN: for.end: 522; TF-FIXEDLEN-NEXT: ret void 523; 524entry: 525 br label %for.body 526 527for.body: 528 %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] 529 %cmp = icmp ugt i64 %iv, 10 530 br i1 %cmp, label %do_load, label %latch 531do_load: 532 %v = load i64, ptr %b, align 8 533 br label %latch 534 535latch: 536 %phi = phi i64 [0, %for.body], [%v, %do_load] 537 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 538 store i64 %phi, ptr %arrayidx 539 %iv.next = add nuw nsw i64 %iv, 1 540 %exitcond.not = icmp eq i64 %iv.next, 1025 541 br i1 %exitcond.not, label %for.end, label %for.body 542 543for.end: 544 ret void 545} 546 547define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 548; SCALABLE-LABEL: @uniform_load_unaligned( 549; SCALABLE-NEXT: entry: 550; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 551; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 552; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 553; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 554; SCALABLE: vector.ph: 555; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 556; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 557; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 558; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 559; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 560; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 561; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 562; SCALABLE: vector.body: 563; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 564; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 565; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B:%.*]], align 1 566; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7]], i64 0 567; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 568; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 569; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 570; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8 571; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 572; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 573; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 574; SCALABLE: middle.block: 575; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 576; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 577; SCALABLE: scalar.ph: 578; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 579; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 580; SCALABLE: for.body: 581; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 582; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 583; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 584; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 585; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 586; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 587; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 588; SCALABLE: for.end: 589; SCALABLE-NEXT: ret void 590; 591; FIXEDLEN-LABEL: @uniform_load_unaligned( 592; FIXEDLEN-NEXT: entry: 593; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 594; FIXEDLEN: vector.ph: 595; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 596; FIXEDLEN: vector.body: 597; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 598; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 599; FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1 600; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 601; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 602; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 603; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 604; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 605; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 606; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 607; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 608; FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 609; FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 610; FIXEDLEN: middle.block: 611; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 612; FIXEDLEN: scalar.ph: 613; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 614; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 615; FIXEDLEN: for.body: 616; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 617; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 618; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 619; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 620; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 621; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 622; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 623; FIXEDLEN: for.end: 624; FIXEDLEN-NEXT: ret void 625; 626; TF-SCALABLE-LABEL: @uniform_load_unaligned( 627; TF-SCALABLE-NEXT: entry: 628; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 629; TF-SCALABLE: vector.ph: 630; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 631; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 632; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 633; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 634; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 635; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 636; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 637; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 638; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 639; TF-SCALABLE: vector.body: 640; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 641; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 642; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) 643; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 1 644; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 645; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 646; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 647; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 648; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 649; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 650; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 651; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 652; TF-SCALABLE: middle.block: 653; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 654; TF-SCALABLE: scalar.ph: 655; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 656; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 657; TF-SCALABLE: for.body: 658; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 659; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 660; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 661; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 662; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 663; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 664; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 665; TF-SCALABLE: for.end: 666; TF-SCALABLE-NEXT: ret void 667; 668; TF-FIXEDLEN-LABEL: @uniform_load_unaligned( 669; TF-FIXEDLEN-NEXT: entry: 670; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 671; TF-FIXEDLEN: vector.ph: 672; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 673; TF-FIXEDLEN: vector.body: 674; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 675; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 676; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 677; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1 678; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 679; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 680; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 681; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 682; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 683; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 684; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 685; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 686; TF-FIXEDLEN: middle.block: 687; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 688; TF-FIXEDLEN: scalar.ph: 689; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 690; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 691; TF-FIXEDLEN: for.body: 692; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 693; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 694; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 695; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 696; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 697; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 698; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 699; TF-FIXEDLEN: for.end: 700; TF-FIXEDLEN-NEXT: ret void 701; 702entry: 703 br label %for.body 704 705for.body: 706 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 707 %v = load i64, ptr %b, align 1 708 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 709 store i64 %v, ptr %arrayidx 710 %iv.next = add nuw nsw i64 %iv, 1 711 %exitcond.not = icmp eq i64 %iv.next, 1025 712 br i1 %exitcond.not, label %for.end, label %for.body 713 714for.end: 715 ret void 716} 717 718define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 719; SCALABLE-LABEL: @uniform_store( 720; SCALABLE-NEXT: entry: 721; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 722; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 723; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 724; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 725; SCALABLE: vector.ph: 726; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 727; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 728; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 729; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 730; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 731; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 732; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 733; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 734; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 735; SCALABLE: vector.body: 736; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 737; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 738; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 739; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 740; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 741; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 742; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 743; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 744; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 745; SCALABLE: middle.block: 746; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 747; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 748; SCALABLE: scalar.ph: 749; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 750; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 751; SCALABLE: for.body: 752; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 753; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 754; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 755; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 756; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 757; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 758; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 759; SCALABLE: for.end: 760; SCALABLE-NEXT: ret void 761; 762; FIXEDLEN-LABEL: @uniform_store( 763; FIXEDLEN-NEXT: entry: 764; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 765; FIXEDLEN: vector.ph: 766; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 767; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 768; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 769; FIXEDLEN: vector.body: 770; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 771; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 772; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 773; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 774; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 775; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 776; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 777; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 778; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 779; FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 780; FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 781; FIXEDLEN: middle.block: 782; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 783; FIXEDLEN: scalar.ph: 784; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 785; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 786; FIXEDLEN: for.body: 787; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 788; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 789; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 790; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 791; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 792; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 793; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 794; FIXEDLEN: for.end: 795; FIXEDLEN-NEXT: ret void 796; 797; TF-SCALABLE-LABEL: @uniform_store( 798; TF-SCALABLE-NEXT: entry: 799; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 800; TF-SCALABLE: vector.ph: 801; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 802; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 803; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 804; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 805; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 806; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 807; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 808; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 809; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 810; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 811; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 812; TF-SCALABLE: vector.body: 813; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 814; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 815; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) 816; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 817; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 818; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 819; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 820; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 821; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 822; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 823; TF-SCALABLE: middle.block: 824; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 825; TF-SCALABLE: scalar.ph: 826; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 827; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 828; TF-SCALABLE: for.body: 829; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 830; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 831; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 832; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 833; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 834; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 835; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 836; TF-SCALABLE: for.end: 837; TF-SCALABLE-NEXT: ret void 838; 839; TF-FIXEDLEN-LABEL: @uniform_store( 840; TF-FIXEDLEN-NEXT: entry: 841; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 842; TF-FIXEDLEN: vector.ph: 843; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 844; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 845; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 846; TF-FIXEDLEN: vector.body: 847; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 848; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 849; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 850; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 851; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 852; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 853; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 854; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 855; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 856; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 857; TF-FIXEDLEN: middle.block: 858; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 859; TF-FIXEDLEN: scalar.ph: 860; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 861; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 862; TF-FIXEDLEN: for.body: 863; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 864; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 865; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 866; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 867; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 868; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 869; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 870; TF-FIXEDLEN: for.end: 871; TF-FIXEDLEN-NEXT: ret void 872; 873entry: 874 br label %for.body 875 876for.body: 877 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 878 store i64 %v, ptr %b, align 8 879 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 880 store i64 %v, ptr %arrayidx 881 %iv.next = add nuw nsw i64 %iv, 1 882 %exitcond.not = icmp eq i64 %iv.next, 1025 883 br i1 %exitcond.not, label %for.end, label %for.body 884 885for.end: 886 ret void 887} 888 889define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 890; SCALABLE-LABEL: @uniform_store_of_loop_varying( 891; SCALABLE-NEXT: entry: 892; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 893; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 894; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 895; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 896; SCALABLE: vector.ph: 897; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 898; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 899; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 900; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 901; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 902; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 903; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 904; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 905; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 906; SCALABLE: vector.body: 907; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 908; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 909; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i64 0 910; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 911; SCALABLE-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]] 912; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 1) 913; SCALABLE-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP8]] 914; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 915; SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 916; SCALABLE-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() 917; SCALABLE-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 2 918; SCALABLE-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 919; SCALABLE-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i64> [[TMP9]], i32 [[TMP14]] 920; SCALABLE-NEXT: store i64 [[TMP15]], ptr [[B:%.*]], align 8 921; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] 922; SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 923; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP17]], align 8 924; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 925; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 926; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 927; SCALABLE: middle.block: 928; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 929; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 930; SCALABLE: scalar.ph: 931; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 932; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 933; SCALABLE: for.body: 934; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 935; SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 936; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 937; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 938; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 939; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 940; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 941; SCALABLE: for.end: 942; SCALABLE-NEXT: ret void 943; 944; FIXEDLEN-LABEL: @uniform_store_of_loop_varying( 945; FIXEDLEN-NEXT: entry: 946; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 947; FIXEDLEN: vector.ph: 948; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 949; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 950; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 951; FIXEDLEN: vector.body: 952; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 953; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 954; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 955; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 5 956; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6 957; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 958; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B:%.*]], align 8 959; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 960; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 961; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 962; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 963; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 964; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 965; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 966; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 967; FIXEDLEN: middle.block: 968; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 969; FIXEDLEN: scalar.ph: 970; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 971; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 972; FIXEDLEN: for.body: 973; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 974; FIXEDLEN-NEXT: store i64 [[IV]], ptr [[B]], align 8 975; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 976; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 977; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 978; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 979; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 980; FIXEDLEN: for.end: 981; FIXEDLEN-NEXT: ret void 982; 983; TF-SCALABLE-LABEL: @uniform_store_of_loop_varying( 984; TF-SCALABLE-NEXT: entry: 985; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 986; TF-SCALABLE: vector.ph: 987; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 988; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 989; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 990; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 991; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 992; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 993; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 994; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 995; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 996; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1) 997; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] 998; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] 999; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 1000; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1001; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0 1002; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 1003; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 1004; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1005; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 1006; TF-SCALABLE: vector.body: 1007; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1008; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1009; TF-SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 1010; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025) 1011; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[VEC_IND]], <vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 1012; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] 1013; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 1014; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 1015; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 1016; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] 1017; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1018; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1019; TF-SCALABLE: middle.block: 1020; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1021; TF-SCALABLE: scalar.ph: 1022; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1023; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1024; TF-SCALABLE: for.body: 1025; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1026; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 1027; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1028; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1029; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1030; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1031; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1032; TF-SCALABLE: for.end: 1033; TF-SCALABLE-NEXT: ret void 1034; 1035; TF-FIXEDLEN-LABEL: @uniform_store_of_loop_varying( 1036; TF-FIXEDLEN-NEXT: entry: 1037; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1038; TF-FIXEDLEN: vector.ph: 1039; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 1040; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 1041; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1042; TF-FIXEDLEN: vector.body: 1043; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 1044; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1045; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 1046; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 1047; TF-FIXEDLEN-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1048; TF-FIXEDLEN: pred.store.if: 1049; TF-FIXEDLEN-NEXT: store i64 [[TMP0]], ptr [[B:%.*]], align 8 1050; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE]] 1051; TF-FIXEDLEN: pred.store.continue: 1052; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 1053; TF-FIXEDLEN-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 1054; TF-FIXEDLEN: pred.store.if1: 1055; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 1056; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B]], align 8 1057; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE2]] 1058; TF-FIXEDLEN: pred.store.continue2: 1059; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 1060; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 1061; TF-FIXEDLEN: pred.store.if3: 1062; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 1063; TF-FIXEDLEN-NEXT: store i64 [[TMP5]], ptr [[B]], align 8 1064; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE4]] 1065; TF-FIXEDLEN: pred.store.continue4: 1066; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 1067; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 1068; TF-FIXEDLEN: pred.store.if5: 1069; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 1070; TF-FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B]], align 8 1071; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE6]] 1072; TF-FIXEDLEN: pred.store.continue6: 1073; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1074; TF-FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 1075; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 1076; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1077; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 1078; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1079; TF-FIXEDLEN: middle.block: 1080; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1081; TF-FIXEDLEN: scalar.ph: 1082; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1083; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1084; TF-FIXEDLEN: for.body: 1085; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1086; TF-FIXEDLEN-NEXT: store i64 [[IV]], ptr [[B]], align 8 1087; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1088; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1089; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1090; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1091; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1092; TF-FIXEDLEN: for.end: 1093; TF-FIXEDLEN-NEXT: ret void 1094; 1095entry: 1096 br label %for.body 1097 1098for.body: 1099 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1100 store i64 %iv, ptr %b, align 8 1101 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 1102 store i64 %v, ptr %arrayidx 1103 %iv.next = add nuw nsw i64 %iv, 1 1104 %exitcond.not = icmp eq i64 %iv.next, 1025 1105 br i1 %exitcond.not, label %for.end, label %for.body 1106 1107for.end: 1108 ret void 1109} 1110 1111define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 1112; SCALABLE-LABEL: @conditional_uniform_store( 1113; SCALABLE-NEXT: entry: 1114; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1115; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 1116; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 1117; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1118; SCALABLE: vector.ph: 1119; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1120; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 1121; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 1122; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 1123; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1124; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 1125; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 1126; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) 1127; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]] 1128; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] 1129; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0 1130; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1131; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 1132; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1133; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0 1134; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 1135; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 1136; SCALABLE: vector.body: 1137; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1138; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1139; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 1140; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) 1141; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]]) 1142; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] 1143; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 1144; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8 1145; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1146; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] 1147; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1148; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1149; SCALABLE: middle.block: 1150; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 1151; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1152; SCALABLE: scalar.ph: 1153; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1154; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1155; SCALABLE: for.body: 1156; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 1157; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 1158; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 1159; SCALABLE: do_store: 1160; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 1161; SCALABLE-NEXT: br label [[LATCH]] 1162; SCALABLE: latch: 1163; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1164; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1165; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1166; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1167; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1168; SCALABLE: for.end: 1169; SCALABLE-NEXT: ret void 1170; 1171; FIXEDLEN-LABEL: @conditional_uniform_store( 1172; FIXEDLEN-NEXT: entry: 1173; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1174; FIXEDLEN: vector.ph: 1175; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 1176; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 1177; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 1178; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT1]], <4 x ptr> poison, <4 x i32> zeroinitializer 1179; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1180; FIXEDLEN: vector.body: 1181; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1182; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1183; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1184; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1185; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) 1186; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) 1187; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]]) 1188; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) 1189; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1190; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 1191; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 1192; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 1193; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 1194; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1195; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) 1196; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 1197; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1198; FIXEDLEN: middle.block: 1199; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1200; FIXEDLEN: scalar.ph: 1201; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1202; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1203; FIXEDLEN: for.body: 1204; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 1205; FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 1206; FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 1207; FIXEDLEN: do_store: 1208; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 1209; FIXEDLEN-NEXT: br label [[LATCH]] 1210; FIXEDLEN: latch: 1211; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1212; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1213; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1214; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1215; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1216; FIXEDLEN: for.end: 1217; FIXEDLEN-NEXT: ret void 1218; 1219; TF-SCALABLE-LABEL: @conditional_uniform_store( 1220; TF-SCALABLE-NEXT: entry: 1221; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1222; TF-SCALABLE: vector.ph: 1223; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1224; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 1225; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 1226; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 1227; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 1228; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1229; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 1230; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 1231; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() 1232; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1) 1233; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] 1234; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] 1235; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 1236; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1237; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 1238; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1239; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0 1240; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 1241; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 1242; TF-SCALABLE: vector.body: 1243; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1244; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1245; TF-SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 1246; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025) 1247; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) 1248; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> zeroinitializer 1249; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]]) 1250; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] 1251; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 1252; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 1253; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 1254; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] 1255; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1256; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1257; TF-SCALABLE: middle.block: 1258; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1259; TF-SCALABLE: scalar.ph: 1260; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1261; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1262; TF-SCALABLE: for.body: 1263; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 1264; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 1265; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 1266; TF-SCALABLE: do_store: 1267; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 1268; TF-SCALABLE-NEXT: br label [[LATCH]] 1269; TF-SCALABLE: latch: 1270; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1271; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1272; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1273; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1274; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1275; TF-SCALABLE: for.end: 1276; TF-SCALABLE-NEXT: ret void 1277; 1278; TF-FIXEDLEN-LABEL: @conditional_uniform_store( 1279; TF-FIXEDLEN-NEXT: entry: 1280; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1281; TF-FIXEDLEN: vector.ph: 1282; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 1283; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 1284; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 1285; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT1]], <4 x ptr> poison, <4 x i32> zeroinitializer 1286; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1287; TF-FIXEDLEN: vector.body: 1288; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1289; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1290; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1291; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 1292; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) 1293; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer 1294; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) 1295; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1296; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 1297; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 1298; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1299; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1300; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 1301; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1302; TF-FIXEDLEN: middle.block: 1303; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1304; TF-FIXEDLEN: scalar.ph: 1305; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1306; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1307; TF-FIXEDLEN: for.body: 1308; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 1309; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 1310; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 1311; TF-FIXEDLEN: do_store: 1312; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 1313; TF-FIXEDLEN-NEXT: br label [[LATCH]] 1314; TF-FIXEDLEN: latch: 1315; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1316; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1317; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1318; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1319; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1320; TF-FIXEDLEN: for.end: 1321; TF-FIXEDLEN-NEXT: ret void 1322; 1323entry: 1324 br label %for.body 1325 1326for.body: 1327 %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] 1328 %cmp = icmp ugt i64 %iv, 10 1329 br i1 %cmp, label %do_store, label %latch 1330do_store: 1331 store i64 %v, ptr %b, align 8 1332 br label %latch 1333latch: 1334 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 1335 store i64 %v, ptr %arrayidx 1336 %iv.next = add nuw nsw i64 %iv, 1 1337 %exitcond.not = icmp eq i64 %iv.next, 1025 1338 br i1 %exitcond.not, label %for.end, label %for.body 1339 1340for.end: 1341 ret void 1342} 1343 1344 1345define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 1346; SCALABLE-LABEL: @uniform_store_unaligned( 1347; SCALABLE-NEXT: entry: 1348; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1349; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 1350; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] 1351; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1352; SCALABLE: vector.ph: 1353; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1354; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 1355; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] 1356; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] 1357; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1358; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 1359; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 1360; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1361; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 1362; SCALABLE: vector.body: 1363; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1364; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1365; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 1366; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 1367; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 1368; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 1369; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1370; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1371; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1372; SCALABLE: middle.block: 1373; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] 1374; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1375; SCALABLE: scalar.ph: 1376; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1377; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1378; SCALABLE: for.body: 1379; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1380; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 1381; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1382; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1383; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1384; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1385; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1386; SCALABLE: for.end: 1387; SCALABLE-NEXT: ret void 1388; 1389; FIXEDLEN-LABEL: @uniform_store_unaligned( 1390; FIXEDLEN-NEXT: entry: 1391; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1392; FIXEDLEN: vector.ph: 1393; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 1394; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 1395; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1396; FIXEDLEN: vector.body: 1397; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1398; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1399; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 1400; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1401; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 1402; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 1403; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 1404; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 1405; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1406; FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 1407; FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1408; FIXEDLEN: middle.block: 1409; FIXEDLEN-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1410; FIXEDLEN: scalar.ph: 1411; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1412; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1413; FIXEDLEN: for.body: 1414; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1415; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1416; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1417; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1418; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1419; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1420; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1421; FIXEDLEN: for.end: 1422; FIXEDLEN-NEXT: ret void 1423; 1424; TF-SCALABLE-LABEL: @uniform_store_unaligned( 1425; TF-SCALABLE-NEXT: entry: 1426; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1427; TF-SCALABLE: vector.ph: 1428; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1429; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 1430; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 1431; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] 1432; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 1433; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1434; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 1435; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 1436; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 1437; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1438; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 1439; TF-SCALABLE: vector.body: 1440; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1441; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 1442; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) 1443; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 1444; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 1445; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 1446; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 1447; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] 1448; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1449; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1450; TF-SCALABLE: middle.block: 1451; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1452; TF-SCALABLE: scalar.ph: 1453; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1454; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1455; TF-SCALABLE: for.body: 1456; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1457; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 1458; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1459; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1460; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1461; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1462; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1463; TF-SCALABLE: for.end: 1464; TF-SCALABLE-NEXT: ret void 1465; 1466; TF-FIXEDLEN-LABEL: @uniform_store_unaligned( 1467; TF-FIXEDLEN-NEXT: entry: 1468; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1469; TF-FIXEDLEN: vector.ph: 1470; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 1471; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 1472; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1473; TF-FIXEDLEN: vector.body: 1474; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1475; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1476; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) 1477; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 1478; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1479; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 1480; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) 1481; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1482; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 1483; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1484; TF-FIXEDLEN: middle.block: 1485; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1486; TF-FIXEDLEN: scalar.ph: 1487; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1488; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1489; TF-FIXEDLEN: for.body: 1490; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1491; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1492; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1493; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1494; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1495; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 1496; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1497; TF-FIXEDLEN: for.end: 1498; TF-FIXEDLEN-NEXT: ret void 1499; 1500entry: 1501 br label %for.body 1502 1503for.body: 1504 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1505 store i64 %v, ptr %b, align 1 1506 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 1507 store i64 %v, ptr %arrayidx 1508 %iv.next = add nuw nsw i64 %iv, 1 1509 %exitcond.not = icmp eq i64 %iv.next, 1025 1510 br i1 %exitcond.not, label %for.end, label %for.body 1511 1512for.end: 1513 ret void 1514} 1515