12cf320d4SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2be51fa45SRoman Lebedev; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK 3be51fa45SRoman Lebedev; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128 4056d6393SPhilip Reames 5056d6393SPhilip Reames; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently 6056d6393SPhilip Reames; not using that information unless an explicit vector width is set. (FIXME) 72cf320d4SPhilip Reames 82cf320d4SPhilip Reames; A collection of fairly basic functional tests when both fixed and scalable vectorization is 92cf320d4SPhilip Reames; allowed. The primary goal of this is check for crashes during cost modeling, but it also 102cf320d4SPhilip Reames; exercises the default heuristics in a useful way. 112cf320d4SPhilip Reames 122cf320d4SPhilip Reamestarget datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 132cf320d4SPhilip Reamestarget triple = "riscv64" 142cf320d4SPhilip Reames 152cf320d4SPhilip Reamesdefine void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 16056d6393SPhilip Reames; VLENUNK-LABEL: @vector_add( 17056d6393SPhilip Reames; VLENUNK-NEXT: entry: 1820dd3297SPhilip Reames; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 198d16c680SLuke Lau; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 208d16c680SLuke Lau; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 2120dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2220dd3297SPhilip Reames; VLENUNK: vector.ph: 238d16c680SLuke Lau; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 248d16c680SLuke Lau; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 258d16c680SLuke Lau; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 2620dd3297SPhilip Reames; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 275ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 285ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 29a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 30a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 3120dd3297SPhilip Reames; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 3220dd3297SPhilip Reames; VLENUNK: vector.body: 3320dd3297SPhilip Reames; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 348d16c680SLuke Lau; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 358d16c680SLuke Lau; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 368d16c680SLuke Lau; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 378d16c680SLuke Lau; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 388d16c680SLuke Lau; VLENUNK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 398d16c680SLuke Lau; VLENUNK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8 408d16c680SLuke Lau; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 418d16c680SLuke Lau; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 428d16c680SLuke Lau; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 4320dd3297SPhilip Reames; VLENUNK: middle.block: 4420dd3297SPhilip Reames; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 4520dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 4620dd3297SPhilip Reames; VLENUNK: scalar.ph: 4720dd3297SPhilip Reames; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 48056d6393SPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 49056d6393SPhilip Reames; VLENUNK: for.body: 5020dd3297SPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 5120dd3297SPhilip Reames; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 52056d6393SPhilip Reames; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 5320dd3297SPhilip Reames; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 54056d6393SPhilip Reames; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 55056d6393SPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 56056d6393SPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 575a115452SSander de Smalen; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 58056d6393SPhilip Reames; VLENUNK: for.end: 59056d6393SPhilip Reames; VLENUNK-NEXT: ret void 60056d6393SPhilip Reames; 61056d6393SPhilip Reames; VLEN128-LABEL: @vector_add( 62056d6393SPhilip Reames; VLEN128-NEXT: entry: 6320dd3297SPhilip Reames; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 648d16c680SLuke Lau; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 658d16c680SLuke Lau; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 6620dd3297SPhilip Reames; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 67056d6393SPhilip Reames; VLEN128: vector.ph: 688d16c680SLuke Lau; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 698d16c680SLuke Lau; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 708d16c680SLuke Lau; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 7120dd3297SPhilip Reames; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 725ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 735ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 74a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 75a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 76056d6393SPhilip Reames; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 77056d6393SPhilip Reames; VLEN128: vector.body: 78056d6393SPhilip Reames; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 798d16c680SLuke Lau; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 808d16c680SLuke Lau; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 818d16c680SLuke Lau; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 828d16c680SLuke Lau; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 838d16c680SLuke Lau; VLEN128-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 848d16c680SLuke Lau; VLEN128-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8 858d16c680SLuke Lau; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 868d16c680SLuke Lau; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 878d16c680SLuke Lau; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 88056d6393SPhilip Reames; VLEN128: middle.block: 8920dd3297SPhilip Reames; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 90056d6393SPhilip Reames; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 91056d6393SPhilip Reames; VLEN128: scalar.ph: 9220dd3297SPhilip Reames; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 93056d6393SPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 94056d6393SPhilip Reames; VLEN128: for.body: 95056d6393SPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 96056d6393SPhilip Reames; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 97056d6393SPhilip Reames; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 98056d6393SPhilip Reames; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 99056d6393SPhilip Reames; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 100056d6393SPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 101056d6393SPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1025a115452SSander de Smalen; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 103056d6393SPhilip Reames; VLEN128: for.end: 104056d6393SPhilip Reames; VLEN128-NEXT: ret void 1052cf320d4SPhilip Reames; 1062cf320d4SPhilip Reamesentry: 1072cf320d4SPhilip Reames br label %for.body 1082cf320d4SPhilip Reames 1092cf320d4SPhilip Reamesfor.body: 1102cf320d4SPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1112cf320d4SPhilip Reames %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 1122cf320d4SPhilip Reames %elem = load i64, ptr %arrayidx 1132cf320d4SPhilip Reames %add = add i64 %elem, %v 1142cf320d4SPhilip Reames store i64 %add, ptr %arrayidx 1152cf320d4SPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 1162cf320d4SPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 1172cf320d4SPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 1182cf320d4SPhilip Reames 1192cf320d4SPhilip Reamesfor.end: 1202cf320d4SPhilip Reames ret void 1212cf320d4SPhilip Reames} 1222cf320d4SPhilip Reames 123ae8fac6fSPhilip Reames; Same as above, but with op type of i32. We currently have a bug around 124ae8fac6fSPhilip Reames; etype=ELEN profitability in the vectorizer, and having a smaller element 125ae8fac6fSPhilip Reames; width test allows us to highlight different aspects of codegen. 126ae8fac6fSPhilip Reamesdefine void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { 127ae8fac6fSPhilip Reames; VLENUNK-LABEL: @vector_add_i32( 128ae8fac6fSPhilip Reames; VLENUNK-NEXT: entry: 129ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1308d16c680SLuke Lau; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 131ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 132ae8fac6fSPhilip Reames; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 133ae8fac6fSPhilip Reames; VLENUNK: vector.ph: 134ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1358d16c680SLuke Lau; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 136ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 137ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 1385ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 1395ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 140a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0 141a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 142ae8fac6fSPhilip Reames; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 143ae8fac6fSPhilip Reames; VLENUNK: vector.body: 144ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 145ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 14615f9cf16SLuke Lau; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 14715f9cf16SLuke Lau; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 1488d16c680SLuke Lau; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 1498d16c680SLuke Lau; VLENUNK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1508d16c680SLuke Lau; VLENUNK-NEXT: store <vscale x 4 x i32> [[TMP7]], ptr [[TMP6]], align 4 15115f9cf16SLuke Lau; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 15215f9cf16SLuke Lau; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 15315f9cf16SLuke Lau; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 154ae8fac6fSPhilip Reames; VLENUNK: middle.block: 155ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 156ae8fac6fSPhilip Reames; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 157ae8fac6fSPhilip Reames; VLENUNK: scalar.ph: 158ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 159ae8fac6fSPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 160ae8fac6fSPhilip Reames; VLENUNK: for.body: 161ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 162ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 163ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 164ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 165ae8fac6fSPhilip Reames; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 166ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 167ae8fac6fSPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 16820dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 169ae8fac6fSPhilip Reames; VLENUNK: for.end: 170ae8fac6fSPhilip Reames; VLENUNK-NEXT: ret void 171ae8fac6fSPhilip Reames; 172ae8fac6fSPhilip Reames; VLEN128-LABEL: @vector_add_i32( 173ae8fac6fSPhilip Reames; VLEN128-NEXT: entry: 1749803b0d1SPhilip Reames; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1758d16c680SLuke Lau; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1769803b0d1SPhilip Reames; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 1779803b0d1SPhilip Reames; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 178ae8fac6fSPhilip Reames; VLEN128: vector.ph: 1799803b0d1SPhilip Reames; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1808d16c680SLuke Lau; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 1819803b0d1SPhilip Reames; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 1829803b0d1SPhilip Reames; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 1835ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 1845ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 185a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0 186a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 187ae8fac6fSPhilip Reames; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 188ae8fac6fSPhilip Reames; VLEN128: vector.body: 189ae8fac6fSPhilip Reames; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1909803b0d1SPhilip Reames; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 19115f9cf16SLuke Lau; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 19215f9cf16SLuke Lau; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 1938d16c680SLuke Lau; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 1948d16c680SLuke Lau; VLEN128-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1958d16c680SLuke Lau; VLEN128-NEXT: store <vscale x 4 x i32> [[TMP7]], ptr [[TMP6]], align 4 19615f9cf16SLuke Lau; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 19715f9cf16SLuke Lau; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 19815f9cf16SLuke Lau; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 199ae8fac6fSPhilip Reames; VLEN128: middle.block: 2009803b0d1SPhilip Reames; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 201ae8fac6fSPhilip Reames; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 202ae8fac6fSPhilip Reames; VLEN128: scalar.ph: 2039803b0d1SPhilip Reames; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 204ae8fac6fSPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 205ae8fac6fSPhilip Reames; VLEN128: for.body: 206ae8fac6fSPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 207ae8fac6fSPhilip Reames; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 208ae8fac6fSPhilip Reames; VLEN128-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 209ae8fac6fSPhilip Reames; VLEN128-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 210ae8fac6fSPhilip Reames; VLEN128-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 211ae8fac6fSPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 212ae8fac6fSPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 213ae8fac6fSPhilip Reames; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 214ae8fac6fSPhilip Reames; VLEN128: for.end: 215ae8fac6fSPhilip Reames; VLEN128-NEXT: ret void 216ae8fac6fSPhilip Reames; 217ae8fac6fSPhilip Reamesentry: 218ae8fac6fSPhilip Reames br label %for.body 219ae8fac6fSPhilip Reames 220ae8fac6fSPhilip Reamesfor.body: 221ae8fac6fSPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 222ae8fac6fSPhilip Reames %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 223ae8fac6fSPhilip Reames %elem = load i32, ptr %arrayidx 224ae8fac6fSPhilip Reames %add = add i32 %elem, %v 225ae8fac6fSPhilip Reames store i32 %add, ptr %arrayidx 226ae8fac6fSPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 227ae8fac6fSPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 228ae8fac6fSPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 229ae8fac6fSPhilip Reames 230ae8fac6fSPhilip Reamesfor.end: 231ae8fac6fSPhilip Reames ret void 232ae8fac6fSPhilip Reames} 233ae8fac6fSPhilip Reames 234ae8fac6fSPhilip Reames 2358ae06642SPhilip Reames; a[b[i]] += v, mostly to exercise scatter/gather costing 2368ae06642SPhilip Reames; TODO: Currently fails to vectorize due to a memory conflict 2378ae06642SPhilip Reamesdefine void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 238056d6393SPhilip Reames; VLENUNK-LABEL: @indexed_add( 239056d6393SPhilip Reames; VLENUNK-NEXT: entry: 240056d6393SPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 241056d6393SPhilip Reames; VLENUNK: for.body: 242056d6393SPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 243056d6393SPhilip Reames; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 244056d6393SPhilip Reames; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 245056d6393SPhilip Reames; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 246056d6393SPhilip Reames; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 247056d6393SPhilip Reames; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 248056d6393SPhilip Reames; VLENUNK-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 249056d6393SPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 250056d6393SPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 251056d6393SPhilip Reames; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 252056d6393SPhilip Reames; VLENUNK: for.end: 253056d6393SPhilip Reames; VLENUNK-NEXT: ret void 254056d6393SPhilip Reames; 255056d6393SPhilip Reames; VLEN128-LABEL: @indexed_add( 256056d6393SPhilip Reames; VLEN128-NEXT: entry: 257056d6393SPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 258056d6393SPhilip Reames; VLEN128: for.body: 259056d6393SPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 260056d6393SPhilip Reames; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 261056d6393SPhilip Reames; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 262056d6393SPhilip Reames; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 263056d6393SPhilip Reames; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 264056d6393SPhilip Reames; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 265056d6393SPhilip Reames; VLEN128-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 266056d6393SPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 267056d6393SPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 268056d6393SPhilip Reames; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 269056d6393SPhilip Reames; VLEN128: for.end: 270056d6393SPhilip Reames; VLEN128-NEXT: ret void 2718ae06642SPhilip Reames; 2728ae06642SPhilip Reamesentry: 2738ae06642SPhilip Reames br label %for.body 2748ae06642SPhilip Reames 2758ae06642SPhilip Reamesfor.body: 2768ae06642SPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 2778ae06642SPhilip Reames %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 2788ae06642SPhilip Reames %aidx = load i64, ptr %baddr 2798ae06642SPhilip Reames %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 2808ae06642SPhilip Reames %elem = load i64, ptr %aaddr 2818ae06642SPhilip Reames %add = add i64 %elem, %v 2828ae06642SPhilip Reames store i64 %add, ptr %aaddr 2838ae06642SPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 2848ae06642SPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 2858ae06642SPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 2868ae06642SPhilip Reames 2878ae06642SPhilip Reamesfor.end: 2888ae06642SPhilip Reames ret void 2898ae06642SPhilip Reames} 2908ae06642SPhilip Reames 2918ae06642SPhilip Reames; a[b[i]] = v, exercise scatter support 2928ae06642SPhilip Reamesdefine void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 293056d6393SPhilip Reames; VLENUNK-LABEL: @indexed_store( 294056d6393SPhilip Reames; VLENUNK-NEXT: entry: 2954d875910SPhilip Reames; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2968d16c680SLuke Lau; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 2978d16c680SLuke Lau; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 2984d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2994d875910SPhilip Reames; VLENUNK: vector.ph: 3008d16c680SLuke Lau; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 3018d16c680SLuke Lau; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 3028d16c680SLuke Lau; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 3034d875910SPhilip Reames; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 3045ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 3055ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 306a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 307a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 3084d875910SPhilip Reames; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 3094d875910SPhilip Reames; VLENUNK: vector.body: 3104d875910SPhilip Reames; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3118d16c680SLuke Lau; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 3128d16c680SLuke Lau; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] 3138d16c680SLuke Lau; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 3148d16c680SLuke Lau; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 3158d16c680SLuke Lau; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 316*56c091eaSPaul Walker; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true)) 3178d16c680SLuke Lau; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 3188d16c680SLuke Lau; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 3198d16c680SLuke Lau; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 3204d875910SPhilip Reames; VLENUNK: middle.block: 3214d875910SPhilip Reames; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 3224d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 3234d875910SPhilip Reames; VLENUNK: scalar.ph: 3244d875910SPhilip Reames; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 325056d6393SPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 326056d6393SPhilip Reames; VLENUNK: for.body: 3274d875910SPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 3284d875910SPhilip Reames; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 329056d6393SPhilip Reames; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 3304d875910SPhilip Reames; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 3314d875910SPhilip Reames; VLENUNK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 332056d6393SPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 333056d6393SPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 3344d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 335056d6393SPhilip Reames; VLENUNK: for.end: 336056d6393SPhilip Reames; VLENUNK-NEXT: ret void 337056d6393SPhilip Reames; 338056d6393SPhilip Reames; VLEN128-LABEL: @indexed_store( 339056d6393SPhilip Reames; VLEN128-NEXT: entry: 3404d875910SPhilip Reames; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 3418d16c680SLuke Lau; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 3428d16c680SLuke Lau; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 3434d875910SPhilip Reames; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 344056d6393SPhilip Reames; VLEN128: vector.ph: 3458d16c680SLuke Lau; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 3468d16c680SLuke Lau; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 3478d16c680SLuke Lau; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 3484d875910SPhilip Reames; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 3495ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 3505ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 351a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 352a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 353056d6393SPhilip Reames; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 354056d6393SPhilip Reames; VLEN128: vector.body: 355056d6393SPhilip Reames; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3568d16c680SLuke Lau; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 3578d16c680SLuke Lau; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] 3588d16c680SLuke Lau; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 3598d16c680SLuke Lau; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 3608d16c680SLuke Lau; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 361*56c091eaSPaul Walker; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true)) 3628d16c680SLuke Lau; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 3638d16c680SLuke Lau; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 3648d16c680SLuke Lau; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 365056d6393SPhilip Reames; VLEN128: middle.block: 3664d875910SPhilip Reames; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 367056d6393SPhilip Reames; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 368056d6393SPhilip Reames; VLEN128: scalar.ph: 3694d875910SPhilip Reames; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 370056d6393SPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 371056d6393SPhilip Reames; VLEN128: for.body: 372056d6393SPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 373056d6393SPhilip Reames; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 374056d6393SPhilip Reames; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 375056d6393SPhilip Reames; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 376056d6393SPhilip Reames; VLEN128-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 377056d6393SPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 378056d6393SPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 379ae8fac6fSPhilip Reames; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 380056d6393SPhilip Reames; VLEN128: for.end: 381056d6393SPhilip Reames; VLEN128-NEXT: ret void 3828ae06642SPhilip Reames; 3838ae06642SPhilip Reamesentry: 3848ae06642SPhilip Reames br label %for.body 3858ae06642SPhilip Reames 3868ae06642SPhilip Reamesfor.body: 3878ae06642SPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 3888ae06642SPhilip Reames %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 3898ae06642SPhilip Reames %aidx = load i64, ptr %baddr 3908ae06642SPhilip Reames %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 3918ae06642SPhilip Reames store i64 %v, ptr %aaddr 3928ae06642SPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 3938ae06642SPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 3948ae06642SPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 3958ae06642SPhilip Reames 3968ae06642SPhilip Reamesfor.end: 3978ae06642SPhilip Reames ret void 3988ae06642SPhilip Reames} 3998ae06642SPhilip Reames 4008ae06642SPhilip Reamesdefine i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 401056d6393SPhilip Reames; VLENUNK-LABEL: @indexed_load( 402056d6393SPhilip Reames; VLENUNK-NEXT: entry: 4034d875910SPhilip Reames; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 4048d16c680SLuke Lau; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 4058d16c680SLuke Lau; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 4064d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4074d875910SPhilip Reames; VLENUNK: vector.ph: 4088d16c680SLuke Lau; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 4098d16c680SLuke Lau; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 4108d16c680SLuke Lau; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 4114d875910SPhilip Reames; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 4125ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 4135ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 4144d875910SPhilip Reames; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 4154d875910SPhilip Reames; VLENUNK: vector.body: 4164d875910SPhilip Reames; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4178d16c680SLuke Lau; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 4188d16c680SLuke Lau; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 4198d16c680SLuke Lau; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] 4208d16c680SLuke Lau; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 4218d16c680SLuke Lau; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 4228d16c680SLuke Lau; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 423*56c091eaSPaul Walker; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison) 4248d16c680SLuke Lau; VLENUNK-NEXT: [[TMP8]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 4258d16c680SLuke Lau; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] 4268d16c680SLuke Lau; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4278d16c680SLuke Lau; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 4284d875910SPhilip Reames; VLENUNK: middle.block: 4293808ba78SFlorian Hahn; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP8]]) 43099d6c6d9SFlorian Hahn; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 4314d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 4324d875910SPhilip Reames; VLENUNK: scalar.ph: 4334d875910SPhilip Reames; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 43499d6c6d9SFlorian Hahn; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 435056d6393SPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 436056d6393SPhilip Reames; VLENUNK: for.body: 4374d875910SPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 4384d875910SPhilip Reames; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 4394d875910SPhilip Reames; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 440056d6393SPhilip Reames; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 4414d875910SPhilip Reames; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 442056d6393SPhilip Reames; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 443056d6393SPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 444056d6393SPhilip Reames; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 445056d6393SPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 4464d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 447056d6393SPhilip Reames; VLENUNK: for.end: 4488d16c680SLuke Lau; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 449056d6393SPhilip Reames; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 450056d6393SPhilip Reames; 451056d6393SPhilip Reames; VLEN128-LABEL: @indexed_load( 452056d6393SPhilip Reames; VLEN128-NEXT: entry: 4534d875910SPhilip Reames; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 4548d16c680SLuke Lau; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 4558d16c680SLuke Lau; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 4564d875910SPhilip Reames; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 457056d6393SPhilip Reames; VLEN128: vector.ph: 4588d16c680SLuke Lau; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 4598d16c680SLuke Lau; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 4608d16c680SLuke Lau; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 4614d875910SPhilip Reames; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 46299d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 46399d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 464056d6393SPhilip Reames; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 465056d6393SPhilip Reames; VLEN128: vector.body: 466056d6393SPhilip Reames; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 46799d6c6d9SFlorian Hahn; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 46899d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 46999d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP6]] 47099d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 47199d6c6d9SFlorian Hahn; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8 47299d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 473*56c091eaSPaul Walker; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP9]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison) 47499d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP10]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 47599d6c6d9SFlorian Hahn; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 4768d16c680SLuke Lau; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4778d16c680SLuke Lau; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 478056d6393SPhilip Reames; VLEN128: middle.block: 47999d6c6d9SFlorian Hahn; VLEN128-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP10]]) 4804d875910SPhilip Reames; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 481056d6393SPhilip Reames; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 482056d6393SPhilip Reames; VLEN128: scalar.ph: 4834d875910SPhilip Reames; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 48499d6c6d9SFlorian Hahn; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 485056d6393SPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 486056d6393SPhilip Reames; VLEN128: for.body: 487056d6393SPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 488056d6393SPhilip Reames; VLEN128-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 489056d6393SPhilip Reames; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 490056d6393SPhilip Reames; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 491056d6393SPhilip Reames; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 492056d6393SPhilip Reames; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 493056d6393SPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 494056d6393SPhilip Reames; VLEN128-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 495056d6393SPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 496ae8fac6fSPhilip Reames; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 497056d6393SPhilip Reames; VLEN128: for.end: 4988d16c680SLuke Lau; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 499056d6393SPhilip Reames; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 5008ae06642SPhilip Reames; 5018ae06642SPhilip Reamesentry: 5028ae06642SPhilip Reames br label %for.body 5038ae06642SPhilip Reames 5048ae06642SPhilip Reamesfor.body: 5058ae06642SPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 5068ae06642SPhilip Reames %sum = phi i64 [0, %entry], [%sum.next, %for.body] 5078ae06642SPhilip Reames %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 5088ae06642SPhilip Reames %aidx = load i64, ptr %baddr 5098ae06642SPhilip Reames %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 5108ae06642SPhilip Reames %elem = load i64, ptr %aaddr 5118ae06642SPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 5128ae06642SPhilip Reames %sum.next = add i64 %sum, %elem 5138ae06642SPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 5148ae06642SPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 5158ae06642SPhilip Reames 5168ae06642SPhilip Reamesfor.end: 5178ae06642SPhilip Reames ret i64 %sum.next 5188ae06642SPhilip Reames} 5198ae06642SPhilip Reames 5202cf320d4SPhilip Reamesdefine void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 521056d6393SPhilip Reames; VLENUNK-LABEL: @splat_int( 522056d6393SPhilip Reames; VLENUNK-NEXT: entry: 52320dd3297SPhilip Reames; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 5248d16c680SLuke Lau; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 5258d16c680SLuke Lau; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 52620dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 52720dd3297SPhilip Reames; VLENUNK: vector.ph: 5288d16c680SLuke Lau; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 5298d16c680SLuke Lau; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 5308d16c680SLuke Lau; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 53120dd3297SPhilip Reames; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 5325ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 5335ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 534a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 535a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 53620dd3297SPhilip Reames; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 53720dd3297SPhilip Reames; VLENUNK: vector.body: 53820dd3297SPhilip Reames; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5398d16c680SLuke Lau; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 5408d16c680SLuke Lau; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 5418d16c680SLuke Lau; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 5428d16c680SLuke Lau; VLENUNK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 5438d16c680SLuke Lau; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 5448d16c680SLuke Lau; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5458d16c680SLuke Lau; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 54620dd3297SPhilip Reames; VLENUNK: middle.block: 54720dd3297SPhilip Reames; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 54820dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 54920dd3297SPhilip Reames; VLENUNK: scalar.ph: 55020dd3297SPhilip Reames; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 551056d6393SPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 552056d6393SPhilip Reames; VLENUNK: for.body: 55320dd3297SPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 55420dd3297SPhilip Reames; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 55520dd3297SPhilip Reames; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 556056d6393SPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 557056d6393SPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 5584d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 559056d6393SPhilip Reames; VLENUNK: for.end: 560056d6393SPhilip Reames; VLENUNK-NEXT: ret void 561056d6393SPhilip Reames; 562056d6393SPhilip Reames; VLEN128-LABEL: @splat_int( 563056d6393SPhilip Reames; VLEN128-NEXT: entry: 56420dd3297SPhilip Reames; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 5658d16c680SLuke Lau; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 5668d16c680SLuke Lau; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 56720dd3297SPhilip Reames; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 568056d6393SPhilip Reames; VLEN128: vector.ph: 5698d16c680SLuke Lau; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 5708d16c680SLuke Lau; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 5718d16c680SLuke Lau; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 57220dd3297SPhilip Reames; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 5735ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 5745ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 575a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 576a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 577056d6393SPhilip Reames; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 578056d6393SPhilip Reames; VLEN128: vector.body: 579056d6393SPhilip Reames; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5808d16c680SLuke Lau; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 5818d16c680SLuke Lau; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 5828d16c680SLuke Lau; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 5838d16c680SLuke Lau; VLEN128-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 5848d16c680SLuke Lau; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 5858d16c680SLuke Lau; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5868d16c680SLuke Lau; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 587056d6393SPhilip Reames; VLEN128: middle.block: 58820dd3297SPhilip Reames; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 589056d6393SPhilip Reames; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 590056d6393SPhilip Reames; VLEN128: scalar.ph: 59120dd3297SPhilip Reames; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 592056d6393SPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 593056d6393SPhilip Reames; VLEN128: for.body: 594056d6393SPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 595056d6393SPhilip Reames; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 596056d6393SPhilip Reames; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 597056d6393SPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 598056d6393SPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 599ae8fac6fSPhilip Reames; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 600056d6393SPhilip Reames; VLEN128: for.end: 601056d6393SPhilip Reames; VLEN128-NEXT: ret void 6022cf320d4SPhilip Reames; 6032cf320d4SPhilip Reamesentry: 6042cf320d4SPhilip Reames br label %for.body 6052cf320d4SPhilip Reames 6062cf320d4SPhilip Reamesfor.body: 6072cf320d4SPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 6082cf320d4SPhilip Reames %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 6092cf320d4SPhilip Reames store i64 %v, ptr %arrayidx 6102cf320d4SPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 6112cf320d4SPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 6122cf320d4SPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 6132cf320d4SPhilip Reames 6142cf320d4SPhilip Reamesfor.end: 6152cf320d4SPhilip Reames ret void 6162cf320d4SPhilip Reames} 6172cf320d4SPhilip Reames 6182cf320d4SPhilip Reamesdefine void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { 619056d6393SPhilip Reames; VLENUNK-LABEL: @splat_ptr( 620056d6393SPhilip Reames; VLENUNK-NEXT: entry: 62120dd3297SPhilip Reames; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 6228d16c680SLuke Lau; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 6238d16c680SLuke Lau; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 62420dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 62520dd3297SPhilip Reames; VLENUNK: vector.ph: 6268d16c680SLuke Lau; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 6278d16c680SLuke Lau; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 6288d16c680SLuke Lau; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 62920dd3297SPhilip Reames; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 6305ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 6315ea6a3fcSFlorian Hahn; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 632a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0 633a5891fa4SFlorian Hahn; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 63420dd3297SPhilip Reames; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 63520dd3297SPhilip Reames; VLENUNK: vector.body: 63620dd3297SPhilip Reames; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6378d16c680SLuke Lau; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 6388d16c680SLuke Lau; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 6398d16c680SLuke Lau; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 6408d16c680SLuke Lau; VLENUNK-NEXT: store <vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 6418d16c680SLuke Lau; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 6428d16c680SLuke Lau; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6438d16c680SLuke Lau; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 64420dd3297SPhilip Reames; VLENUNK: middle.block: 64520dd3297SPhilip Reames; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 64620dd3297SPhilip Reames; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 64720dd3297SPhilip Reames; VLENUNK: scalar.ph: 64820dd3297SPhilip Reames; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 649056d6393SPhilip Reames; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 650056d6393SPhilip Reames; VLENUNK: for.body: 65120dd3297SPhilip Reames; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 65220dd3297SPhilip Reames; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 65320dd3297SPhilip Reames; VLENUNK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 654056d6393SPhilip Reames; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 655056d6393SPhilip Reames; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 6564d875910SPhilip Reames; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 657056d6393SPhilip Reames; VLENUNK: for.end: 658056d6393SPhilip Reames; VLENUNK-NEXT: ret void 659056d6393SPhilip Reames; 660056d6393SPhilip Reames; VLEN128-LABEL: @splat_ptr( 661056d6393SPhilip Reames; VLEN128-NEXT: entry: 66220dd3297SPhilip Reames; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 6638d16c680SLuke Lau; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 6648d16c680SLuke Lau; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 66520dd3297SPhilip Reames; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 666056d6393SPhilip Reames; VLEN128: vector.ph: 6678d16c680SLuke Lau; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 6688d16c680SLuke Lau; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 6698d16c680SLuke Lau; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 67020dd3297SPhilip Reames; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 6715ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 6725ea6a3fcSFlorian Hahn; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 673a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0 674a5891fa4SFlorian Hahn; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 675056d6393SPhilip Reames; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 676056d6393SPhilip Reames; VLEN128: vector.body: 677056d6393SPhilip Reames; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6788d16c680SLuke Lau; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 6798d16c680SLuke Lau; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 6808d16c680SLuke Lau; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 6818d16c680SLuke Lau; VLEN128-NEXT: store <vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 6828d16c680SLuke Lau; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 6838d16c680SLuke Lau; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6848d16c680SLuke Lau; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 685056d6393SPhilip Reames; VLEN128: middle.block: 68620dd3297SPhilip Reames; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 687056d6393SPhilip Reames; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 688056d6393SPhilip Reames; VLEN128: scalar.ph: 68920dd3297SPhilip Reames; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 690056d6393SPhilip Reames; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 691056d6393SPhilip Reames; VLEN128: for.body: 692056d6393SPhilip Reames; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 693056d6393SPhilip Reames; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 694056d6393SPhilip Reames; VLEN128-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 695056d6393SPhilip Reames; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 696056d6393SPhilip Reames; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 697ae8fac6fSPhilip Reames; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 698056d6393SPhilip Reames; VLEN128: for.end: 699056d6393SPhilip Reames; VLEN128-NEXT: ret void 7002cf320d4SPhilip Reames; 7012cf320d4SPhilip Reamesentry: 7022cf320d4SPhilip Reames br label %for.body 7032cf320d4SPhilip Reames 7042cf320d4SPhilip Reamesfor.body: 7052cf320d4SPhilip Reames %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 7062cf320d4SPhilip Reames %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 7072cf320d4SPhilip Reames store ptr %v, ptr %arrayidx 7082cf320d4SPhilip Reames %iv.next = add nuw nsw i64 %iv, 1 7092cf320d4SPhilip Reames %exitcond.not = icmp eq i64 %iv.next, 1024 7102cf320d4SPhilip Reames br i1 %exitcond.not, label %for.end, label %for.body 7112cf320d4SPhilip Reames 7122cf320d4SPhilip Reamesfor.end: 7132cf320d4SPhilip Reames ret void 7142cf320d4SPhilip Reames} 7152cf320d4SPhilip Reames 716