xref: /llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll (revision 56c091ea7106507b36015297ee9005c9d5fab0bf)
12cf320d4SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2be51fa45SRoman Lebedev; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK
3be51fa45SRoman Lebedev; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128
4056d6393SPhilip Reames
5056d6393SPhilip Reames; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently
6056d6393SPhilip Reames; not using that information unless an explicit vector width is set. (FIXME)
72cf320d4SPhilip Reames
82cf320d4SPhilip Reames; A collection of fairly basic functional tests when both fixed and scalable vectorization is
92cf320d4SPhilip Reames; allowed.  The primary goal of this is check for crashes during cost modeling, but it also
102cf320d4SPhilip Reames; exercises the default heuristics in a useful way.
112cf320d4SPhilip Reames
122cf320d4SPhilip Reamestarget datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
132cf320d4SPhilip Reamestarget triple = "riscv64"
142cf320d4SPhilip Reames
152cf320d4SPhilip Reamesdefine void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
16056d6393SPhilip Reames; VLENUNK-LABEL: @vector_add(
17056d6393SPhilip Reames; VLENUNK-NEXT:  entry:
1820dd3297SPhilip Reames; VLENUNK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
198d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
208d16c680SLuke Lau; VLENUNK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
2120dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2220dd3297SPhilip Reames; VLENUNK:       vector.ph:
238d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
248d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
258d16c680SLuke Lau; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
2620dd3297SPhilip Reames; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
275ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
285ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 2
29a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
30a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
3120dd3297SPhilip Reames; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
3220dd3297SPhilip Reames; VLENUNK:       vector.body:
3320dd3297SPhilip Reames; VLENUNK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
348d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
358d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
368d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
378d16c680SLuke Lau; VLENUNK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
388d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
398d16c680SLuke Lau; VLENUNK-NEXT:    store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8
408d16c680SLuke Lau; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
418d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
428d16c680SLuke Lau; VLENUNK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4320dd3297SPhilip Reames; VLENUNK:       middle.block:
4420dd3297SPhilip Reames; VLENUNK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
4520dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
4620dd3297SPhilip Reames; VLENUNK:       scalar.ph:
4720dd3297SPhilip Reames; VLENUNK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
48056d6393SPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
49056d6393SPhilip Reames; VLENUNK:       for.body:
5020dd3297SPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
5120dd3297SPhilip Reames; VLENUNK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
52056d6393SPhilip Reames; VLENUNK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
5320dd3297SPhilip Reames; VLENUNK-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
54056d6393SPhilip Reames; VLENUNK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
55056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
56056d6393SPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
575a115452SSander de Smalen; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
58056d6393SPhilip Reames; VLENUNK:       for.end:
59056d6393SPhilip Reames; VLENUNK-NEXT:    ret void
60056d6393SPhilip Reames;
61056d6393SPhilip Reames; VLEN128-LABEL: @vector_add(
62056d6393SPhilip Reames; VLEN128-NEXT:  entry:
6320dd3297SPhilip Reames; VLEN128-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
648d16c680SLuke Lau; VLEN128-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
658d16c680SLuke Lau; VLEN128-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
6620dd3297SPhilip Reames; VLEN128-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
67056d6393SPhilip Reames; VLEN128:       vector.ph:
688d16c680SLuke Lau; VLEN128-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
698d16c680SLuke Lau; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
708d16c680SLuke Lau; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
7120dd3297SPhilip Reames; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
725ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
735ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 2
74a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
75a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
76056d6393SPhilip Reames; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
77056d6393SPhilip Reames; VLEN128:       vector.body:
78056d6393SPhilip Reames; VLEN128-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
798d16c680SLuke Lau; VLEN128-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
808d16c680SLuke Lau; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
818d16c680SLuke Lau; VLEN128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
828d16c680SLuke Lau; VLEN128-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
838d16c680SLuke Lau; VLEN128-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
848d16c680SLuke Lau; VLEN128-NEXT:    store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8
858d16c680SLuke Lau; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
868d16c680SLuke Lau; VLEN128-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
878d16c680SLuke Lau; VLEN128-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
88056d6393SPhilip Reames; VLEN128:       middle.block:
8920dd3297SPhilip Reames; VLEN128-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
90056d6393SPhilip Reames; VLEN128-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
91056d6393SPhilip Reames; VLEN128:       scalar.ph:
9220dd3297SPhilip Reames; VLEN128-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
93056d6393SPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
94056d6393SPhilip Reames; VLEN128:       for.body:
95056d6393SPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
96056d6393SPhilip Reames; VLEN128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
97056d6393SPhilip Reames; VLEN128-NEXT:    [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
98056d6393SPhilip Reames; VLEN128-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
99056d6393SPhilip Reames; VLEN128-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
100056d6393SPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
101056d6393SPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
1025a115452SSander de Smalen; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
103056d6393SPhilip Reames; VLEN128:       for.end:
104056d6393SPhilip Reames; VLEN128-NEXT:    ret void
1052cf320d4SPhilip Reames;
1062cf320d4SPhilip Reamesentry:
1072cf320d4SPhilip Reames  br label %for.body
1082cf320d4SPhilip Reames
1092cf320d4SPhilip Reamesfor.body:
1102cf320d4SPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1112cf320d4SPhilip Reames  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
1122cf320d4SPhilip Reames  %elem = load i64, ptr %arrayidx
1132cf320d4SPhilip Reames  %add = add i64 %elem, %v
1142cf320d4SPhilip Reames  store i64 %add, ptr %arrayidx
1152cf320d4SPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
1162cf320d4SPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
1172cf320d4SPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
1182cf320d4SPhilip Reames
1192cf320d4SPhilip Reamesfor.end:
1202cf320d4SPhilip Reames  ret void
1212cf320d4SPhilip Reames}
1222cf320d4SPhilip Reames
123ae8fac6fSPhilip Reames; Same as above, but with op type of i32.  We currently have a bug around
124ae8fac6fSPhilip Reames; etype=ELEN profitability in the vectorizer, and having a smaller element
125ae8fac6fSPhilip Reames; width test allows us to highlight different aspects of codegen.
126ae8fac6fSPhilip Reamesdefine void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) {
127ae8fac6fSPhilip Reames; VLENUNK-LABEL: @vector_add_i32(
128ae8fac6fSPhilip Reames; VLENUNK-NEXT:  entry:
129ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1308d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
131ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
132ae8fac6fSPhilip Reames; VLENUNK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
133ae8fac6fSPhilip Reames; VLENUNK:       vector.ph:
134ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1358d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
136ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
137ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1385ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
1395ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 4
140a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
141a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
142ae8fac6fSPhilip Reames; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
143ae8fac6fSPhilip Reames; VLENUNK:       vector.body:
144ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
145ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
14615f9cf16SLuke Lau; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]]
14715f9cf16SLuke Lau; VLENUNK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
1488d16c680SLuke Lau; VLENUNK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
1498d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP7:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1508d16c680SLuke Lau; VLENUNK-NEXT:    store <vscale x 4 x i32> [[TMP7]], ptr [[TMP6]], align 4
15115f9cf16SLuke Lau; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
15215f9cf16SLuke Lau; VLENUNK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
15315f9cf16SLuke Lau; VLENUNK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
154ae8fac6fSPhilip Reames; VLENUNK:       middle.block:
155ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
156ae8fac6fSPhilip Reames; VLENUNK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
157ae8fac6fSPhilip Reames; VLENUNK:       scalar.ph:
158ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
159ae8fac6fSPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
160ae8fac6fSPhilip Reames; VLENUNK:       for.body:
161ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
162ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
163ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
164ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[ADD:%.*]] = add i32 [[ELEM]], [[V]]
165ae8fac6fSPhilip Reames; VLENUNK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
166ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
167ae8fac6fSPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
16820dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
169ae8fac6fSPhilip Reames; VLENUNK:       for.end:
170ae8fac6fSPhilip Reames; VLENUNK-NEXT:    ret void
171ae8fac6fSPhilip Reames;
172ae8fac6fSPhilip Reames; VLEN128-LABEL: @vector_add_i32(
173ae8fac6fSPhilip Reames; VLEN128-NEXT:  entry:
1749803b0d1SPhilip Reames; VLEN128-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1758d16c680SLuke Lau; VLEN128-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1769803b0d1SPhilip Reames; VLEN128-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
1779803b0d1SPhilip Reames; VLEN128-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
178ae8fac6fSPhilip Reames; VLEN128:       vector.ph:
1799803b0d1SPhilip Reames; VLEN128-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1808d16c680SLuke Lau; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1819803b0d1SPhilip Reames; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
1829803b0d1SPhilip Reames; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1835ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
1845ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 4
185a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
186a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
187ae8fac6fSPhilip Reames; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
188ae8fac6fSPhilip Reames; VLEN128:       vector.body:
189ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1909803b0d1SPhilip Reames; VLEN128-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
19115f9cf16SLuke Lau; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]]
19215f9cf16SLuke Lau; VLEN128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
1938d16c680SLuke Lau; VLEN128-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
1948d16c680SLuke Lau; VLEN128-NEXT:    [[TMP7:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1958d16c680SLuke Lau; VLEN128-NEXT:    store <vscale x 4 x i32> [[TMP7]], ptr [[TMP6]], align 4
19615f9cf16SLuke Lau; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
19715f9cf16SLuke Lau; VLEN128-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
19815f9cf16SLuke Lau; VLEN128-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
199ae8fac6fSPhilip Reames; VLEN128:       middle.block:
2009803b0d1SPhilip Reames; VLEN128-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
201ae8fac6fSPhilip Reames; VLEN128-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
202ae8fac6fSPhilip Reames; VLEN128:       scalar.ph:
2039803b0d1SPhilip Reames; VLEN128-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
204ae8fac6fSPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
205ae8fac6fSPhilip Reames; VLEN128:       for.body:
206ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
207ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
208ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
209ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[ADD:%.*]] = add i32 [[ELEM]], [[V]]
210ae8fac6fSPhilip Reames; VLEN128-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
211ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
212ae8fac6fSPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
213ae8fac6fSPhilip Reames; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
214ae8fac6fSPhilip Reames; VLEN128:       for.end:
215ae8fac6fSPhilip Reames; VLEN128-NEXT:    ret void
216ae8fac6fSPhilip Reames;
217ae8fac6fSPhilip Reamesentry:
218ae8fac6fSPhilip Reames  br label %for.body
219ae8fac6fSPhilip Reames
220ae8fac6fSPhilip Reamesfor.body:
221ae8fac6fSPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
222ae8fac6fSPhilip Reames  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
223ae8fac6fSPhilip Reames  %elem = load i32, ptr %arrayidx
224ae8fac6fSPhilip Reames  %add = add i32 %elem, %v
225ae8fac6fSPhilip Reames  store i32 %add, ptr %arrayidx
226ae8fac6fSPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
227ae8fac6fSPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
228ae8fac6fSPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
229ae8fac6fSPhilip Reames
230ae8fac6fSPhilip Reamesfor.end:
231ae8fac6fSPhilip Reames  ret void
232ae8fac6fSPhilip Reames}
233ae8fac6fSPhilip Reames
234ae8fac6fSPhilip Reames
2358ae06642SPhilip Reames; a[b[i]] += v, mostly to exercise scatter/gather costing
2368ae06642SPhilip Reames; TODO: Currently fails to vectorize due to a memory conflict
2378ae06642SPhilip Reamesdefine void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
238056d6393SPhilip Reames; VLENUNK-LABEL: @indexed_add(
239056d6393SPhilip Reames; VLENUNK-NEXT:  entry:
240056d6393SPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
241056d6393SPhilip Reames; VLENUNK:       for.body:
242056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
243056d6393SPhilip Reames; VLENUNK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
244056d6393SPhilip Reames; VLENUNK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
245056d6393SPhilip Reames; VLENUNK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
246056d6393SPhilip Reames; VLENUNK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
247056d6393SPhilip Reames; VLENUNK-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]]
248056d6393SPhilip Reames; VLENUNK-NEXT:    store i64 [[ADD]], ptr [[AADDR]], align 8
249056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
250056d6393SPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
251056d6393SPhilip Reames; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
252056d6393SPhilip Reames; VLENUNK:       for.end:
253056d6393SPhilip Reames; VLENUNK-NEXT:    ret void
254056d6393SPhilip Reames;
255056d6393SPhilip Reames; VLEN128-LABEL: @indexed_add(
256056d6393SPhilip Reames; VLEN128-NEXT:  entry:
257056d6393SPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
258056d6393SPhilip Reames; VLEN128:       for.body:
259056d6393SPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
260056d6393SPhilip Reames; VLEN128-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
261056d6393SPhilip Reames; VLEN128-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
262056d6393SPhilip Reames; VLEN128-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
263056d6393SPhilip Reames; VLEN128-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
264056d6393SPhilip Reames; VLEN128-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]]
265056d6393SPhilip Reames; VLEN128-NEXT:    store i64 [[ADD]], ptr [[AADDR]], align 8
266056d6393SPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
267056d6393SPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
268056d6393SPhilip Reames; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
269056d6393SPhilip Reames; VLEN128:       for.end:
270056d6393SPhilip Reames; VLEN128-NEXT:    ret void
2718ae06642SPhilip Reames;
2728ae06642SPhilip Reamesentry:
2738ae06642SPhilip Reames  br label %for.body
2748ae06642SPhilip Reames
2758ae06642SPhilip Reamesfor.body:
2768ae06642SPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2778ae06642SPhilip Reames  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
2788ae06642SPhilip Reames  %aidx = load i64, ptr %baddr
2798ae06642SPhilip Reames  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
2808ae06642SPhilip Reames  %elem = load i64, ptr %aaddr
2818ae06642SPhilip Reames  %add = add i64 %elem, %v
2828ae06642SPhilip Reames  store i64 %add, ptr %aaddr
2838ae06642SPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
2848ae06642SPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
2858ae06642SPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
2868ae06642SPhilip Reames
2878ae06642SPhilip Reamesfor.end:
2888ae06642SPhilip Reames  ret void
2898ae06642SPhilip Reames}
2908ae06642SPhilip Reames
2918ae06642SPhilip Reames; a[b[i]] = v, exercise scatter support
2928ae06642SPhilip Reamesdefine void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
293056d6393SPhilip Reames; VLENUNK-LABEL: @indexed_store(
294056d6393SPhilip Reames; VLENUNK-NEXT:  entry:
2954d875910SPhilip Reames; VLENUNK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
2968d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
2978d16c680SLuke Lau; VLENUNK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
2984d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2994d875910SPhilip Reames; VLENUNK:       vector.ph:
3008d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
3018d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
3028d16c680SLuke Lau; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
3034d875910SPhilip Reames; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
3045ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
3055ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 2
306a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
307a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
3084d875910SPhilip Reames; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
3094d875910SPhilip Reames; VLENUNK:       vector.body:
3104d875910SPhilip Reames; VLENUNK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3118d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
3128d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]]
3138d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
3148d16c680SLuke Lau; VLENUNK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
3158d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]]
316*56c091eaSPaul Walker; VLENUNK-NEXT:    call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true))
3178d16c680SLuke Lau; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
3188d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3198d16c680SLuke Lau; VLENUNK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
3204d875910SPhilip Reames; VLENUNK:       middle.block:
3214d875910SPhilip Reames; VLENUNK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
3224d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3234d875910SPhilip Reames; VLENUNK:       scalar.ph:
3244d875910SPhilip Reames; VLENUNK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
325056d6393SPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
326056d6393SPhilip Reames; VLENUNK:       for.body:
3274d875910SPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
3284d875910SPhilip Reames; VLENUNK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
329056d6393SPhilip Reames; VLENUNK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
3304d875910SPhilip Reames; VLENUNK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
3314d875910SPhilip Reames; VLENUNK-NEXT:    store i64 [[V]], ptr [[AADDR]], align 8
332056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
333056d6393SPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
3344d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
335056d6393SPhilip Reames; VLENUNK:       for.end:
336056d6393SPhilip Reames; VLENUNK-NEXT:    ret void
337056d6393SPhilip Reames;
338056d6393SPhilip Reames; VLEN128-LABEL: @indexed_store(
339056d6393SPhilip Reames; VLEN128-NEXT:  entry:
3404d875910SPhilip Reames; VLEN128-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
3418d16c680SLuke Lau; VLEN128-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
3428d16c680SLuke Lau; VLEN128-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
3434d875910SPhilip Reames; VLEN128-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
344056d6393SPhilip Reames; VLEN128:       vector.ph:
3458d16c680SLuke Lau; VLEN128-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
3468d16c680SLuke Lau; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
3478d16c680SLuke Lau; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
3484d875910SPhilip Reames; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
3495ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
3505ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 2
351a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
352a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
353056d6393SPhilip Reames; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
354056d6393SPhilip Reames; VLEN128:       vector.body:
355056d6393SPhilip Reames; VLEN128-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3568d16c680SLuke Lau; VLEN128-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
3578d16c680SLuke Lau; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]]
3588d16c680SLuke Lau; VLEN128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
3598d16c680SLuke Lau; VLEN128-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
3608d16c680SLuke Lau; VLEN128-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]]
361*56c091eaSPaul Walker; VLEN128-NEXT:    call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true))
3628d16c680SLuke Lau; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
3638d16c680SLuke Lau; VLEN128-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3648d16c680SLuke Lau; VLEN128-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
365056d6393SPhilip Reames; VLEN128:       middle.block:
3664d875910SPhilip Reames; VLEN128-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
367056d6393SPhilip Reames; VLEN128-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
368056d6393SPhilip Reames; VLEN128:       scalar.ph:
3694d875910SPhilip Reames; VLEN128-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
370056d6393SPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
371056d6393SPhilip Reames; VLEN128:       for.body:
372056d6393SPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
373056d6393SPhilip Reames; VLEN128-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
374056d6393SPhilip Reames; VLEN128-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
375056d6393SPhilip Reames; VLEN128-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
376056d6393SPhilip Reames; VLEN128-NEXT:    store i64 [[V]], ptr [[AADDR]], align 8
377056d6393SPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
378056d6393SPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
379ae8fac6fSPhilip Reames; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
380056d6393SPhilip Reames; VLEN128:       for.end:
381056d6393SPhilip Reames; VLEN128-NEXT:    ret void
3828ae06642SPhilip Reames;
3838ae06642SPhilip Reamesentry:
3848ae06642SPhilip Reames  br label %for.body
3858ae06642SPhilip Reames
3868ae06642SPhilip Reamesfor.body:
3878ae06642SPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
3888ae06642SPhilip Reames  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
3898ae06642SPhilip Reames  %aidx = load i64, ptr %baddr
3908ae06642SPhilip Reames  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
3918ae06642SPhilip Reames  store i64 %v, ptr %aaddr
3928ae06642SPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
3938ae06642SPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
3948ae06642SPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
3958ae06642SPhilip Reames
3968ae06642SPhilip Reamesfor.end:
3978ae06642SPhilip Reames  ret void
3988ae06642SPhilip Reames}
3998ae06642SPhilip Reames
4008ae06642SPhilip Reamesdefine i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
401056d6393SPhilip Reames; VLENUNK-LABEL: @indexed_load(
402056d6393SPhilip Reames; VLENUNK-NEXT:  entry:
4034d875910SPhilip Reames; VLENUNK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
4048d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
4058d16c680SLuke Lau; VLENUNK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
4064d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4074d875910SPhilip Reames; VLENUNK:       vector.ph:
4088d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
4098d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
4108d16c680SLuke Lau; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
4114d875910SPhilip Reames; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
4125ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
4135ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
4144d875910SPhilip Reames; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
4154d875910SPhilip Reames; VLENUNK:       vector.body:
4164d875910SPhilip Reames; VLENUNK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4178d16c680SLuke Lau; VLENUNK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
4188d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
4198d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]]
4208d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
4218d16c680SLuke Lau; VLENUNK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
4228d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]]
423*56c091eaSPaul Walker; VLENUNK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison)
4248d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP8]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
4258d16c680SLuke Lau; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
4268d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4278d16c680SLuke Lau; VLENUNK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
4284d875910SPhilip Reames; VLENUNK:       middle.block:
4293808ba78SFlorian Hahn; VLENUNK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP8]])
43099d6c6d9SFlorian Hahn; VLENUNK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
4314d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
4324d875910SPhilip Reames; VLENUNK:       scalar.ph:
4334d875910SPhilip Reames; VLENUNK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
43499d6c6d9SFlorian Hahn; VLENUNK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
435056d6393SPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
436056d6393SPhilip Reames; VLENUNK:       for.body:
4374d875910SPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
4384d875910SPhilip Reames; VLENUNK-NEXT:    [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
4394d875910SPhilip Reames; VLENUNK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
440056d6393SPhilip Reames; VLENUNK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
4414d875910SPhilip Reames; VLENUNK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
442056d6393SPhilip Reames; VLENUNK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
443056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
444056d6393SPhilip Reames; VLENUNK-NEXT:    [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
445056d6393SPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
4464d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
447056d6393SPhilip Reames; VLENUNK:       for.end:
4488d16c680SLuke Lau; VLENUNK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
449056d6393SPhilip Reames; VLENUNK-NEXT:    ret i64 [[SUM_NEXT_LCSSA]]
450056d6393SPhilip Reames;
451056d6393SPhilip Reames; VLEN128-LABEL: @indexed_load(
452056d6393SPhilip Reames; VLEN128-NEXT:  entry:
4534d875910SPhilip Reames; VLEN128-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
4548d16c680SLuke Lau; VLEN128-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
4558d16c680SLuke Lau; VLEN128-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
4564d875910SPhilip Reames; VLEN128-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
457056d6393SPhilip Reames; VLEN128:       vector.ph:
4588d16c680SLuke Lau; VLEN128-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
4598d16c680SLuke Lau; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
4608d16c680SLuke Lau; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
4614d875910SPhilip Reames; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
46299d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
46399d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
464056d6393SPhilip Reames; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
465056d6393SPhilip Reames; VLEN128:       vector.body:
466056d6393SPhilip Reames; VLEN128-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
46799d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
46899d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
46999d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP6]]
47099d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
47199d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8
47299d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]]
473*56c091eaSPaul Walker; VLEN128-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP9]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison)
47499d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP10]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
47599d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
4768d16c680SLuke Lau; VLEN128-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4778d16c680SLuke Lau; VLEN128-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
478056d6393SPhilip Reames; VLEN128:       middle.block:
47999d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP10]])
4804d875910SPhilip Reames; VLEN128-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
481056d6393SPhilip Reames; VLEN128-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
482056d6393SPhilip Reames; VLEN128:       scalar.ph:
4834d875910SPhilip Reames; VLEN128-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
48499d6c6d9SFlorian Hahn; VLEN128-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
485056d6393SPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
486056d6393SPhilip Reames; VLEN128:       for.body:
487056d6393SPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
488056d6393SPhilip Reames; VLEN128-NEXT:    [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
489056d6393SPhilip Reames; VLEN128-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
490056d6393SPhilip Reames; VLEN128-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
491056d6393SPhilip Reames; VLEN128-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
492056d6393SPhilip Reames; VLEN128-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
493056d6393SPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
494056d6393SPhilip Reames; VLEN128-NEXT:    [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
495056d6393SPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
496ae8fac6fSPhilip Reames; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
497056d6393SPhilip Reames; VLEN128:       for.end:
4988d16c680SLuke Lau; VLEN128-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
499056d6393SPhilip Reames; VLEN128-NEXT:    ret i64 [[SUM_NEXT_LCSSA]]
5008ae06642SPhilip Reames;
5018ae06642SPhilip Reamesentry:
5028ae06642SPhilip Reames  br label %for.body
5038ae06642SPhilip Reames
5048ae06642SPhilip Reamesfor.body:
5058ae06642SPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
5068ae06642SPhilip Reames  %sum = phi i64 [0, %entry], [%sum.next, %for.body]
5078ae06642SPhilip Reames  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
5088ae06642SPhilip Reames  %aidx = load i64, ptr %baddr
5098ae06642SPhilip Reames  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
5108ae06642SPhilip Reames  %elem = load i64, ptr %aaddr
5118ae06642SPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
5128ae06642SPhilip Reames  %sum.next = add i64 %sum, %elem
5138ae06642SPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
5148ae06642SPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
5158ae06642SPhilip Reames
5168ae06642SPhilip Reamesfor.end:
5178ae06642SPhilip Reames  ret i64 %sum.next
5188ae06642SPhilip Reames}
5198ae06642SPhilip Reames
5202cf320d4SPhilip Reamesdefine void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
521056d6393SPhilip Reames; VLENUNK-LABEL: @splat_int(
522056d6393SPhilip Reames; VLENUNK-NEXT:  entry:
52320dd3297SPhilip Reames; VLENUNK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
5248d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
5258d16c680SLuke Lau; VLENUNK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
52620dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
52720dd3297SPhilip Reames; VLENUNK:       vector.ph:
5288d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
5298d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
5308d16c680SLuke Lau; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
53120dd3297SPhilip Reames; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
5325ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
5335ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 2
534a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
535a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
53620dd3297SPhilip Reames; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
53720dd3297SPhilip Reames; VLENUNK:       vector.body:
53820dd3297SPhilip Reames; VLENUNK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5398d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
5408d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
5418d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
5428d16c680SLuke Lau; VLENUNK-NEXT:    store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
5438d16c680SLuke Lau; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
5448d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5458d16c680SLuke Lau; VLENUNK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
54620dd3297SPhilip Reames; VLENUNK:       middle.block:
54720dd3297SPhilip Reames; VLENUNK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
54820dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
54920dd3297SPhilip Reames; VLENUNK:       scalar.ph:
55020dd3297SPhilip Reames; VLENUNK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
551056d6393SPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
552056d6393SPhilip Reames; VLENUNK:       for.body:
55320dd3297SPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
55420dd3297SPhilip Reames; VLENUNK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
55520dd3297SPhilip Reames; VLENUNK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
556056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
557056d6393SPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
5584d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
559056d6393SPhilip Reames; VLENUNK:       for.end:
560056d6393SPhilip Reames; VLENUNK-NEXT:    ret void
561056d6393SPhilip Reames;
562056d6393SPhilip Reames; VLEN128-LABEL: @splat_int(
563056d6393SPhilip Reames; VLEN128-NEXT:  entry:
56420dd3297SPhilip Reames; VLEN128-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
5658d16c680SLuke Lau; VLEN128-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
5668d16c680SLuke Lau; VLEN128-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
56720dd3297SPhilip Reames; VLEN128-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
568056d6393SPhilip Reames; VLEN128:       vector.ph:
5698d16c680SLuke Lau; VLEN128-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
5708d16c680SLuke Lau; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
5718d16c680SLuke Lau; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
57220dd3297SPhilip Reames; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
5735ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
5745ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 2
575a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
576a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
577056d6393SPhilip Reames; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
578056d6393SPhilip Reames; VLEN128:       vector.body:
579056d6393SPhilip Reames; VLEN128-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5808d16c680SLuke Lau; VLEN128-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
5818d16c680SLuke Lau; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
5828d16c680SLuke Lau; VLEN128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
5838d16c680SLuke Lau; VLEN128-NEXT:    store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
5848d16c680SLuke Lau; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
5858d16c680SLuke Lau; VLEN128-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5868d16c680SLuke Lau; VLEN128-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
587056d6393SPhilip Reames; VLEN128:       middle.block:
58820dd3297SPhilip Reames; VLEN128-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
589056d6393SPhilip Reames; VLEN128-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
590056d6393SPhilip Reames; VLEN128:       scalar.ph:
59120dd3297SPhilip Reames; VLEN128-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
592056d6393SPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
593056d6393SPhilip Reames; VLEN128:       for.body:
594056d6393SPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
595056d6393SPhilip Reames; VLEN128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
596056d6393SPhilip Reames; VLEN128-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
597056d6393SPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
598056d6393SPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
599ae8fac6fSPhilip Reames; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
600056d6393SPhilip Reames; VLEN128:       for.end:
601056d6393SPhilip Reames; VLEN128-NEXT:    ret void
6022cf320d4SPhilip Reames;
6032cf320d4SPhilip Reamesentry:
6042cf320d4SPhilip Reames  br label %for.body
6052cf320d4SPhilip Reames
6062cf320d4SPhilip Reamesfor.body:
6072cf320d4SPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
6082cf320d4SPhilip Reames  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
6092cf320d4SPhilip Reames  store i64 %v, ptr %arrayidx
6102cf320d4SPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
6112cf320d4SPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
6122cf320d4SPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
6132cf320d4SPhilip Reames
6142cf320d4SPhilip Reamesfor.end:
6152cf320d4SPhilip Reames  ret void
6162cf320d4SPhilip Reames}
6172cf320d4SPhilip Reames
6182cf320d4SPhilip Reamesdefine void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) {
619056d6393SPhilip Reames; VLENUNK-LABEL: @splat_ptr(
620056d6393SPhilip Reames; VLENUNK-NEXT:  entry:
62120dd3297SPhilip Reames; VLENUNK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
6228d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
6238d16c680SLuke Lau; VLENUNK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
62420dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
62520dd3297SPhilip Reames; VLENUNK:       vector.ph:
6268d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
6278d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
6288d16c680SLuke Lau; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
62920dd3297SPhilip Reames; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
6305ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
6315ea6a3fcSFlorian Hahn; VLENUNK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 2
632a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0
633a5891fa4SFlorian Hahn; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
63420dd3297SPhilip Reames; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
63520dd3297SPhilip Reames; VLENUNK:       vector.body:
63620dd3297SPhilip Reames; VLENUNK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6378d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
6388d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
6398d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
6408d16c680SLuke Lau; VLENUNK-NEXT:    store <vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
6418d16c680SLuke Lau; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
6428d16c680SLuke Lau; VLENUNK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6438d16c680SLuke Lau; VLENUNK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
64420dd3297SPhilip Reames; VLENUNK:       middle.block:
64520dd3297SPhilip Reames; VLENUNK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
64620dd3297SPhilip Reames; VLENUNK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
64720dd3297SPhilip Reames; VLENUNK:       scalar.ph:
64820dd3297SPhilip Reames; VLENUNK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
649056d6393SPhilip Reames; VLENUNK-NEXT:    br label [[FOR_BODY:%.*]]
650056d6393SPhilip Reames; VLENUNK:       for.body:
65120dd3297SPhilip Reames; VLENUNK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
65220dd3297SPhilip Reames; VLENUNK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
65320dd3297SPhilip Reames; VLENUNK-NEXT:    store ptr [[V]], ptr [[ARRAYIDX]], align 8
654056d6393SPhilip Reames; VLENUNK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
655056d6393SPhilip Reames; VLENUNK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
6564d875910SPhilip Reames; VLENUNK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
657056d6393SPhilip Reames; VLENUNK:       for.end:
658056d6393SPhilip Reames; VLENUNK-NEXT:    ret void
659056d6393SPhilip Reames;
660056d6393SPhilip Reames; VLEN128-LABEL: @splat_ptr(
661056d6393SPhilip Reames; VLEN128-NEXT:  entry:
66220dd3297SPhilip Reames; VLEN128-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
6638d16c680SLuke Lau; VLEN128-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
6648d16c680SLuke Lau; VLEN128-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
66520dd3297SPhilip Reames; VLEN128-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
666056d6393SPhilip Reames; VLEN128:       vector.ph:
6678d16c680SLuke Lau; VLEN128-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
6688d16c680SLuke Lau; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
6698d16c680SLuke Lau; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
67020dd3297SPhilip Reames; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
6715ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
6725ea6a3fcSFlorian Hahn; VLEN128-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 2
673a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0
674a5891fa4SFlorian Hahn; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
675056d6393SPhilip Reames; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
676056d6393SPhilip Reames; VLEN128:       vector.body:
677056d6393SPhilip Reames; VLEN128-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6788d16c680SLuke Lau; VLEN128-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
6798d16c680SLuke Lau; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
6808d16c680SLuke Lau; VLEN128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
6818d16c680SLuke Lau; VLEN128-NEXT:    store <vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
6828d16c680SLuke Lau; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
6838d16c680SLuke Lau; VLEN128-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6848d16c680SLuke Lau; VLEN128-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
685056d6393SPhilip Reames; VLEN128:       middle.block:
68620dd3297SPhilip Reames; VLEN128-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
687056d6393SPhilip Reames; VLEN128-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
688056d6393SPhilip Reames; VLEN128:       scalar.ph:
68920dd3297SPhilip Reames; VLEN128-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
690056d6393SPhilip Reames; VLEN128-NEXT:    br label [[FOR_BODY:%.*]]
691056d6393SPhilip Reames; VLEN128:       for.body:
692056d6393SPhilip Reames; VLEN128-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
693056d6393SPhilip Reames; VLEN128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
694056d6393SPhilip Reames; VLEN128-NEXT:    store ptr [[V]], ptr [[ARRAYIDX]], align 8
695056d6393SPhilip Reames; VLEN128-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
696056d6393SPhilip Reames; VLEN128-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
697ae8fac6fSPhilip Reames; VLEN128-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
698056d6393SPhilip Reames; VLEN128:       for.end:
699056d6393SPhilip Reames; VLEN128-NEXT:    ret void
7002cf320d4SPhilip Reames;
7012cf320d4SPhilip Reamesentry:
7022cf320d4SPhilip Reames  br label %for.body
7032cf320d4SPhilip Reames
7042cf320d4SPhilip Reamesfor.body:
7052cf320d4SPhilip Reames  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
7062cf320d4SPhilip Reames  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
7072cf320d4SPhilip Reames  store ptr %v, ptr %arrayidx
7082cf320d4SPhilip Reames  %iv.next = add nuw nsw i64 %iv, 1
7092cf320d4SPhilip Reames  %exitcond.not = icmp eq i64 %iv.next, 1024
7102cf320d4SPhilip Reames  br i1 %exitcond.not, label %for.end, label %for.body
7112cf320d4SPhilip Reames
7122cf320d4SPhilip Reamesfor.end:
7132cf320d4SPhilip Reames  ret void
7142cf320d4SPhilip Reames}
7152cf320d4SPhilip Reames
716