1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK 3; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128 4 5; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently 6; not using that information unless an explicit vector width is set. (FIXME) 7 8; A collection of fairly basic functional tests when both fixed and scalable vectorization is 9; allowed. The primary goal of this is check for crashes during cost modeling, but it also 10; exercises the default heuristics in a useful way. 11 12target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 13target triple = "riscv64" 14 15define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 16; VLENUNK-LABEL: @vector_add( 17; VLENUNK-NEXT: entry: 18; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 19; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 20; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 21; VLENUNK: vector.ph: 22; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 23; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 24; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 25; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 26; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 27; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 28; VLENUNK: vector.body: 29; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 30; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 31; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 32; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 33; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 34; VLENUNK-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 35; VLENUNK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8 36; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 37; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 38; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 39; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 40; VLENUNK: middle.block: 41; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 42; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 43; VLENUNK: scalar.ph: 44; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 45; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 46; VLENUNK: for.body: 47; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 48; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 49; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 50; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 51; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 52; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 53; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 54; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 55; VLENUNK: for.end: 56; VLENUNK-NEXT: ret void 57; 58; VLEN128-LABEL: @vector_add( 59; VLEN128-NEXT: entry: 60; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 61; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 62; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 63; VLEN128: vector.ph: 64; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 65; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 66; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 67; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 68; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 69; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 70; VLEN128: vector.body: 71; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 72; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 73; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 74; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 75; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 76; VLEN128-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 77; VLEN128-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8 78; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 79; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 80; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 81; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 82; VLEN128: middle.block: 83; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 84; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 85; VLEN128: scalar.ph: 86; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 87; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 88; VLEN128: for.body: 89; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 90; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 91; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 92; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 93; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 94; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 95; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 96; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 97; VLEN128: for.end: 98; VLEN128-NEXT: ret void 99; 100entry: 101 br label %for.body 102 103for.body: 104 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 105 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 106 %elem = load i64, ptr %arrayidx 107 %add = add i64 %elem, %v 108 store i64 %add, ptr %arrayidx 109 %iv.next = add nuw nsw i64 %iv, 1 110 %exitcond.not = icmp eq i64 %iv.next, 1024 111 br i1 %exitcond.not, label %for.end, label %for.body 112 113for.end: 114 ret void 115} 116 117; Same as above, but with op type of i32. We currently have a bug around 118; etype=ELEN profitability in the vectorizer, and having a smaller element 119; width test allows us to highlight different aspects of codegen. 120define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { 121; VLENUNK-LABEL: @vector_add_i32( 122; VLENUNK-NEXT: entry: 123; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 124; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 125; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 126; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 127; VLENUNK: vector.ph: 128; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 129; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 130; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 131; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 132; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0 133; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 134; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0 135; VLENUNK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 136; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 137; VLENUNK: vector.body: 138; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 139; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 140; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 141; VLENUNK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 142; VLENUNK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 143; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 144; VLENUNK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] 145; VLENUNK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 146; VLENUNK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 147; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 148; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4 149; VLENUNK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() 150; VLENUNK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 151; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] 152; VLENUNK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4 153; VLENUNK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 154; VLENUNK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 155; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4 156; VLENUNK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() 157; VLENUNK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 158; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] 159; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4 160; VLENUNK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 161; VLENUNK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 162; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] 163; VLENUNK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 164; VLENUNK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 165; VLENUNK: middle.block: 166; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 167; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 168; VLENUNK: scalar.ph: 169; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 170; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 171; VLENUNK: for.body: 172; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 173; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 174; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 175; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 176; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 177; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 178; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 179; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 180; VLENUNK: for.end: 181; VLENUNK-NEXT: ret void 182; 183; VLEN128-LABEL: @vector_add_i32( 184; VLEN128-NEXT: entry: 185; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 186; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 187; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 188; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 189; VLEN128: vector.ph: 190; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 191; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 192; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 193; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 194; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0 195; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 196; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0 197; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 198; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 199; VLEN128: vector.body: 200; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 201; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 202; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 203; VLEN128-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 204; VLEN128-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 205; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 206; VLEN128-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] 207; VLEN128-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 208; VLEN128-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 209; VLEN128-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 210; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4 211; VLEN128-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() 212; VLEN128-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 213; VLEN128-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] 214; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4 215; VLEN128-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 216; VLEN128-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 217; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4 218; VLEN128-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() 219; VLEN128-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 220; VLEN128-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] 221; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4 222; VLEN128-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 223; VLEN128-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 224; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] 225; VLEN128-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 226; VLEN128-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 227; VLEN128: middle.block: 228; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 229; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 230; VLEN128: scalar.ph: 231; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 232; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 233; VLEN128: for.body: 234; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 235; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 236; VLEN128-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 237; VLEN128-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 238; VLEN128-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 239; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 240; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 241; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 242; VLEN128: for.end: 243; VLEN128-NEXT: ret void 244; 245entry: 246 br label %for.body 247 248for.body: 249 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 250 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 251 %elem = load i32, ptr %arrayidx 252 %add = add i32 %elem, %v 253 store i32 %add, ptr %arrayidx 254 %iv.next = add nuw nsw i64 %iv, 1 255 %exitcond.not = icmp eq i64 %iv.next, 1024 256 br i1 %exitcond.not, label %for.end, label %for.body 257 258for.end: 259 ret void 260} 261 262 263; a[b[i]] += v, mostly to exercise scatter/gather costing 264; TODO: Currently fails to vectorize due to a memory conflict 265define void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 266; VLENUNK-LABEL: @indexed_add( 267; VLENUNK-NEXT: entry: 268; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 269; VLENUNK: for.body: 270; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 271; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 272; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 273; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 274; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 275; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 276; VLENUNK-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 277; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 278; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 279; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 280; VLENUNK: for.end: 281; VLENUNK-NEXT: ret void 282; 283; VLEN128-LABEL: @indexed_add( 284; VLEN128-NEXT: entry: 285; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 286; VLEN128: for.body: 287; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 288; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 289; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 290; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 291; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 292; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 293; VLEN128-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 294; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 295; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 296; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 297; VLEN128: for.end: 298; VLEN128-NEXT: ret void 299; 300entry: 301 br label %for.body 302 303for.body: 304 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 305 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 306 %aidx = load i64, ptr %baddr 307 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 308 %elem = load i64, ptr %aaddr 309 %add = add i64 %elem, %v 310 store i64 %add, ptr %aaddr 311 %iv.next = add nuw nsw i64 %iv, 1 312 %exitcond.not = icmp eq i64 %iv.next, 1024 313 br i1 %exitcond.not, label %for.end, label %for.body 314 315for.end: 316 ret void 317} 318 319; a[b[i]] = v, exercise scatter support 320define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 321; VLENUNK-LABEL: @indexed_store( 322; VLENUNK-NEXT: entry: 323; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 324; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 325; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 326; VLENUNK: vector.ph: 327; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 328; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 329; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 330; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 331; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 332; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 333; VLENUNK: vector.body: 334; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 335; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 336; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] 337; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 338; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 339; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]] 340; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)) 341; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 342; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 343; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 344; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 345; VLENUNK: middle.block: 346; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 347; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 348; VLENUNK: scalar.ph: 349; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 350; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 351; VLENUNK: for.body: 352; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 353; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 354; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 355; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 356; VLENUNK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 357; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 358; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 359; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 360; VLENUNK: for.end: 361; VLENUNK-NEXT: ret void 362; 363; VLEN128-LABEL: @indexed_store( 364; VLEN128-NEXT: entry: 365; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 366; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 367; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 368; VLEN128: vector.ph: 369; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 370; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 371; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 372; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 373; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 374; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 375; VLEN128: vector.body: 376; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 377; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 378; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] 379; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 380; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 381; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]] 382; VLEN128-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)) 383; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 384; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 385; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 386; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 387; VLEN128: middle.block: 388; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 389; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 390; VLEN128: scalar.ph: 391; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 392; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 393; VLEN128: for.body: 394; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 395; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 396; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 397; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 398; VLEN128-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 399; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 400; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 401; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 402; VLEN128: for.end: 403; VLEN128-NEXT: ret void 404; 405entry: 406 br label %for.body 407 408for.body: 409 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 410 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 411 %aidx = load i64, ptr %baddr 412 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 413 store i64 %v, ptr %aaddr 414 %iv.next = add nuw nsw i64 %iv, 1 415 %exitcond.not = icmp eq i64 %iv.next, 1024 416 br i1 %exitcond.not, label %for.end, label %for.body 417 418for.end: 419 ret void 420} 421 422define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 423; VLENUNK-LABEL: @indexed_load( 424; VLENUNK-NEXT: entry: 425; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 426; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 427; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 428; VLENUNK: vector.ph: 429; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 430; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 431; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 432; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 433; VLENUNK: vector.body: 434; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 435; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 436; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 437; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] 438; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 439; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 440; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]] 441; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison) 442; VLENUNK-NEXT: [[TMP6]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 443; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 444; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] 445; VLENUNK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 446; VLENUNK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 447; VLENUNK: middle.block: 448; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[TMP6]]) 449; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 450; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 451; VLENUNK: scalar.ph: 452; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 453; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 454; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 455; VLENUNK: for.body: 456; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 457; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 458; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 459; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 460; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 461; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 462; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 463; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 464; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 465; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 466; VLENUNK: for.end: 467; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 468; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 469; 470; VLEN128-LABEL: @indexed_load( 471; VLEN128-NEXT: entry: 472; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 473; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 474; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 475; VLEN128: vector.ph: 476; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 477; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 478; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 479; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 480; VLEN128: vector.body: 481; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 482; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 483; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 484; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] 485; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 486; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 487; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]] 488; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison) 489; VLEN128-NEXT: [[TMP6]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 490; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 491; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] 492; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 493; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 494; VLEN128: middle.block: 495; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[TMP6]]) 496; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 497; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 498; VLEN128: scalar.ph: 499; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 500; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 501; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 502; VLEN128: for.body: 503; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 504; VLEN128-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 505; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 506; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 507; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 508; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 509; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 510; VLEN128-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 511; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 512; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 513; VLEN128: for.end: 514; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 515; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 516; 517entry: 518 br label %for.body 519 520for.body: 521 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 522 %sum = phi i64 [0, %entry], [%sum.next, %for.body] 523 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 524 %aidx = load i64, ptr %baddr 525 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 526 %elem = load i64, ptr %aaddr 527 %iv.next = add nuw nsw i64 %iv, 1 528 %sum.next = add i64 %sum, %elem 529 %exitcond.not = icmp eq i64 %iv.next, 1024 530 br i1 %exitcond.not, label %for.end, label %for.body 531 532for.end: 533 ret i64 %sum.next 534} 535 536define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 537; VLENUNK-LABEL: @splat_int( 538; VLENUNK-NEXT: entry: 539; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 540; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 541; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 542; VLENUNK: vector.ph: 543; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 544; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 545; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 546; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 547; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 548; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 549; VLENUNK: vector.body: 550; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 551; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 552; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 553; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 554; VLENUNK-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 555; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 556; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 557; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 558; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 559; VLENUNK: middle.block: 560; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 561; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 562; VLENUNK: scalar.ph: 563; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 564; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 565; VLENUNK: for.body: 566; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 567; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 568; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 569; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 570; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 571; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 572; VLENUNK: for.end: 573; VLENUNK-NEXT: ret void 574; 575; VLEN128-LABEL: @splat_int( 576; VLEN128-NEXT: entry: 577; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 578; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 579; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 580; VLEN128: vector.ph: 581; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 582; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 583; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 584; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 585; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 586; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 587; VLEN128: vector.body: 588; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 589; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 590; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 591; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 592; VLEN128-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 593; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 594; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 595; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 596; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 597; VLEN128: middle.block: 598; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 599; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 600; VLEN128: scalar.ph: 601; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 602; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 603; VLEN128: for.body: 604; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 605; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 606; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 607; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 608; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 609; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 610; VLEN128: for.end: 611; VLEN128-NEXT: ret void 612; 613entry: 614 br label %for.body 615 616for.body: 617 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 618 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 619 store i64 %v, ptr %arrayidx 620 %iv.next = add nuw nsw i64 %iv, 1 621 %exitcond.not = icmp eq i64 %iv.next, 1024 622 br i1 %exitcond.not, label %for.end, label %for.body 623 624for.end: 625 ret void 626} 627 628define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { 629; VLENUNK-LABEL: @splat_ptr( 630; VLENUNK-NEXT: entry: 631; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 632; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 633; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 634; VLENUNK: vector.ph: 635; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 636; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 637; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 638; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0 639; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer 640; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 641; VLENUNK: vector.body: 642; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 643; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 644; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 645; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 646; VLENUNK-NEXT: store <vscale x 1 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 647; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 648; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 649; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 650; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 651; VLENUNK: middle.block: 652; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 653; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 654; VLENUNK: scalar.ph: 655; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 656; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 657; VLENUNK: for.body: 658; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 659; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 660; VLENUNK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 661; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 662; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 663; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 664; VLENUNK: for.end: 665; VLENUNK-NEXT: ret void 666; 667; VLEN128-LABEL: @splat_ptr( 668; VLEN128-NEXT: entry: 669; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 670; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 671; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 672; VLEN128: vector.ph: 673; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 674; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 675; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 676; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0 677; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer 678; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 679; VLEN128: vector.body: 680; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 681; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 682; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 683; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 684; VLEN128-NEXT: store <vscale x 1 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 685; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 686; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 687; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 688; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 689; VLEN128: middle.block: 690; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 691; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 692; VLEN128: scalar.ph: 693; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 694; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 695; VLEN128: for.body: 696; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 697; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 698; VLEN128-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 699; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 700; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 701; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 702; VLEN128: for.end: 703; VLEN128-NEXT: ret void 704; 705entry: 706 br label %for.body 707 708for.body: 709 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 710 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 711 store ptr %v, ptr %arrayidx 712 %iv.next = add nuw nsw i64 %iv, 1 713 %exitcond.not = icmp eq i64 %iv.next, 1024 714 br i1 %exitcond.not, label %for.end, label %for.body 715 716for.end: 717 ret void 718} 719 720