xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll (revision a761e26b2364ea457b79b9a4bea6d792e4913d24)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v | FileCheck %s --check-prefixes=CHECK
3
4%struct.foo = type { i32, i32, i32, i32 }
5
6declare <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
7
8define <vscale x 1 x i64> @gather(ptr %a, i32 %len) {
9; CHECK-LABEL: @gather(
10; CHECK-NEXT:  vector.ph:
11; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
12; CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
13; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
14; CHECK:       vector.body:
15; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
16; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
17; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
18; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
19; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
20; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
21; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP2]])
22; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
23; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
24; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]]
25; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
26; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
27; CHECK:       for.cond.cleanup:
28; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
29;
30vector.ph:
31  %wide.trip.count = zext i32 %len to i64
32  %0 = tail call i64 @llvm.vscale.i64()
33  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
34  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
35  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
36  br label %vector.body
37
38vector.body:                                      ; preds = %vector.body, %vector.ph
39  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
40  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
41  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
42  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
43  %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
44  %accum.next = add <vscale x 1 x i64> %accum, %gather
45  %index.next = add nuw i64 %index, %0
46  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
47  %3 = icmp ne i64 %index.next, %wide.trip.count
48  br i1 %3, label %for.cond.cleanup, label %vector.body
49
50for.cond.cleanup:                                 ; preds = %vector.body
51  ret <vscale x 1 x i64> %accum.next
52}
53
54define <vscale x 1 x i64> @gather_disjoint_or(ptr %a, i64 %len) {
55; CHECK-LABEL: @gather_disjoint_or(
56; CHECK-NEXT:  vector.ph:
57; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
58; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
59; CHECK:       vector.body:
60; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
61; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
62; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
63; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
64; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
65; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
66; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
67; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
68; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]]
69; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2
70; CHECK-NEXT:    [[EXIT:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[LEN:%.*]]
71; CHECK-NEXT:    br i1 [[EXIT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
72; CHECK:       for.cond.cleanup:
73; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
74;
75vector.ph:
76  %vscale = call i64 @llvm.vscale.i64()
77  %step = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
78  %step.mul2 = shl <vscale x 1 x i64> %step, splat (i64 1)
79  br label %vector.body
80
81vector.body:                                      ; preds = %vector.body, %vector.ph
82  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
83  %vec.ind = phi <vscale x 1 x i64> [ %step.mul2, %vector.ph ], [ %vec.ind.next, %vector.body ]
84
85  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
86
87  %vec.ind.or = or disjoint <vscale x 1 x i64> %vec.ind, splat (i64 1)
88
89  %gep = getelementptr i64, ptr %a, <vscale x 1 x i64> %vec.ind.or
90  %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
91  <vscale x 1 x ptr> %gep,
92  i32 8,
93  <vscale x 1 x i1> splat (i1 true),
94  <vscale x 1 x i64> poison
95  )
96
97  %accum.next = add <vscale x 1 x i64> %accum, %gather
98  %index.next = add nuw i64 %index, %vscale
99  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, splat (i64 2)
100
101  %exit = icmp ne i64 %index.next, %len
102  br i1 %exit, label %for.cond.cleanup, label %vector.body
103
104for.cond.cleanup:                                 ; preds = %vector.body
105  ret <vscale x 1 x i64> %accum.next
106}
107
108define <vscale x 1 x i64> @gather_non_invariant_step(ptr %a, ptr %b, i32 %len) {
109; CHECK-LABEL: @gather_non_invariant_step(
110; CHECK-NEXT:  vector.ph:
111; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
112; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
113; CHECK:       vector.body:
114; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
115; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
116; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
117; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
118; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
119; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
120; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]])
121; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
122; CHECK-NEXT:    [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
123; CHECK-NEXT:    [[STEP:%.*]] = load i64, ptr [[B]], align 8
124; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
125; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]]
126; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
127; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
128; CHECK:       for.cond.cleanup:
129; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
130;
131vector.ph:
132  %wide.trip.count = zext i32 %len to i64
133  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
134  br label %vector.body
135
136vector.body:                                      ; preds = %vector.body, %vector.ph
137  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
138  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
139  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
140  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
141  %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
142  %accum.next = add <vscale x 1 x i64> %accum, %gather
143
144  %b.gep = getelementptr i64, ptr %b, i64 %index
145  %step = load i64, ptr %b.gep
146  %index.next = add nuw i64 %index, %step
147  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
148  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
149  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
150  %3 = icmp ne i64 %index.next, %wide.trip.count
151  br i1 %3, label %for.cond.cleanup, label %vector.body
152
153for.cond.cleanup:                                 ; preds = %vector.body
154  ret <vscale x 1 x i64> %accum.next
155}
156
157define <vscale x 1 x i64> @gather_non_invariant_step_shl(ptr %a, ptr %b, i32 %len) {
158; CHECK-LABEL: @gather_non_invariant_step_shl(
159; CHECK-NEXT:  vector.ph:
160; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
161; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
162; CHECK:       vector.body:
163; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
164; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
165; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
166; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
167; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
168; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
169; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]])
170; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
171; CHECK-NEXT:    [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
172; CHECK-NEXT:    [[STEP:%.*]] = load i64, ptr [[B]], align 8
173; CHECK-NEXT:    [[STEP1:%.*]] = shl i64 [[STEP]], 2
174; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
175; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]]
176; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
177; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
178; CHECK:       for.cond.cleanup:
179; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
180;
181vector.ph:
182  %wide.trip.count = zext i32 %len to i64
183  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
184  br label %vector.body
185
186vector.body:                                      ; preds = %vector.body, %vector.ph
187  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
188  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
189  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
190
191  %vec.ind.add = add <vscale x 1 x i64> %vec.ind, splat (i64 42)
192  %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind.add, splat (i64 2)
193
194  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3
195  %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
196  %accum.next = add <vscale x 1 x i64> %accum, %gather
197
198  %b.gep = getelementptr i64, ptr %b, i64 %index
199  %step = load i64, ptr %b.gep
200  %index.next = add nuw i64 %index, %step
201  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
202  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
203  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
204  %3 = icmp ne i64 %index.next, %wide.trip.count
205  br i1 %3, label %for.cond.cleanup, label %vector.body
206
207for.cond.cleanup:                                 ; preds = %vector.body
208  ret <vscale x 1 x i64> %accum.next
209}
210
211; Check that the operand of the binary op (%scale.splat in shl) always dominates
212; the existing step value when we're adjusting it.
213define <vscale x 1 x i64> @gather_splat_op_after_step(ptr %a, ptr %b, i32 %len) {
214; CHECK-LABEL: @gather_splat_op_after_step(
215; CHECK-NEXT:  vector.ph:
216; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
217; CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
218; CHECK-NEXT:    [[SCALE:%.*]] = load i64, ptr [[B:%.*]], align 8
219; CHECK-NEXT:    [[STRIDE:%.*]] = shl i64 1, [[SCALE]]
220; CHECK-NEXT:    [[STEP:%.*]] = shl i64 [[TMP0]], [[SCALE]]
221; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[STRIDE]], 16
222; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
223; CHECK:       vector.body:
224; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
225; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
226; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
227; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
228; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
229; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
230; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> undef, i32 [[TMP3]])
231; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
232; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
233; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]]
234; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
235; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
236; CHECK:       for.cond.cleanup:
237; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
238;
239vector.ph:
240  %wide.trip.count = zext i32 %len to i64
241  %0 = tail call i64 @llvm.vscale.i64()
242  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
243  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
244  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
245
246  %scale = load i64, ptr %b
247  %scale.head = insertelement <vscale x 1 x i64> poison, i64 %scale, i64 0
248  %scale.splat = shufflevector <vscale x 1 x i64> %scale.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
249  br label %vector.body
250
251vector.body:                                      ; preds = %vector.body, %vector.ph
252  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
253  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
254  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
255  %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind, %scale.splat
256  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3
257  %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
258  %accum.next = add <vscale x 1 x i64> %accum, %gather
259  %index.next = add nuw i64 %index, %0
260  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
261  %3 = icmp ne i64 %index.next, %wide.trip.count
262  br i1 %3, label %for.cond.cleanup, label %vector.body
263
264for.cond.cleanup:                                 ; preds = %vector.body
265  ret <vscale x 1 x i64> %accum.next
266}
267
268define void @scatter(ptr %a, i32 %len) {
269; CHECK-LABEL: @scatter(
270; CHECK-NEXT:  vector.ph:
271; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
272; CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
273; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
274; CHECK:       vector.body:
275; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
276; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
277; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
278; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
279; CHECK-NEXT:    call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
280; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
281; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]]
282; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
283; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
284; CHECK:       for.cond.cleanup:
285; CHECK-NEXT:    ret void
286;
287vector.ph:
288  %wide.trip.count = zext i32 %len to i64
289  %0 = tail call i64 @llvm.vscale.i64()
290  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
291  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
292  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
293  br label %vector.body
294
295vector.body:                                      ; preds = %vector.body, %vector.ph
296  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
297  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
298  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
299  tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true))
300  %index.next = add nuw i64 %index, %0
301  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
302  %3 = icmp ne i64 %index.next, %wide.trip.count
303  br i1 %3, label %for.cond.cleanup, label %vector.body
304
305for.cond.cleanup:                                 ; preds = %vector.body
306  ret void
307}
308
309define void @scatter_non_invariant_step(ptr %a, ptr %b, i32 %len) {
310; CHECK-LABEL: @scatter_non_invariant_step(
311; CHECK-NEXT:  vector.ph:
312; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
313; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
314; CHECK:       vector.body:
315; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
316; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
317; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
318; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
319; CHECK-NEXT:    call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
320; CHECK-NEXT:    [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
321; CHECK-NEXT:    [[STEP:%.*]] = load i64, ptr [[B]], align 8
322; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
323; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]]
324; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
325; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
326; CHECK:       for.cond.cleanup:
327; CHECK-NEXT:    ret void
328;
329vector.ph:
330  %wide.trip.count = zext i32 %len to i64
331  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
332  br label %vector.body
333
334vector.body:                                      ; preds = %vector.body, %vector.ph
335  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
336  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
337  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
338  tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true))
339
340  %b.gep = getelementptr i64, ptr %b, i64 %index
341  %step = load i64, ptr %b.gep
342  %index.next = add nuw i64 %index, %step
343  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
344  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
345  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
346  %3 = icmp ne i64 %index.next, %wide.trip.count
347  br i1 %3, label %for.cond.cleanup, label %vector.body
348
349for.cond.cleanup:                                 ; preds = %vector.body
350  ret void
351}
352
353define void @scatter_non_invariant_step_add_shl(ptr %a, ptr %b, i32 %len) {
354; CHECK-LABEL: @scatter_non_invariant_step_add_shl(
355; CHECK-NEXT:  vector.ph:
356; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
357; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
358; CHECK:       vector.body:
359; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
360; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
361; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
362; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
363; CHECK-NEXT:    call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
364; CHECK-NEXT:    [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
365; CHECK-NEXT:    [[STEP:%.*]] = load i64, ptr [[B]], align 8
366; CHECK-NEXT:    [[STEP1:%.*]] = shl i64 [[STEP]], 2
367; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
368; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]]
369; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
370; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
371; CHECK:       for.cond.cleanup:
372; CHECK-NEXT:    ret void
373;
374vector.ph:
375  %wide.trip.count = zext i32 %len to i64
376  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
377  br label %vector.body
378
379vector.body:                                      ; preds = %vector.body, %vector.ph
380  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
381  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
382
383  %vec.ind.add = add <vscale x 1 x i64> %vec.ind, splat (i64 42)
384  %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind.add, splat (i64 2)
385
386  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3
387  tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true))
388
389  %b.gep = getelementptr i64, ptr %b, i64 %index
390  %step = load i64, ptr %b.gep
391  %index.next = add nuw i64 %index, %step
392  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0
393  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
394  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
395  %3 = icmp ne i64 %index.next, %wide.trip.count
396  br i1 %3, label %for.cond.cleanup, label %vector.body
397
398for.cond.cleanup:                                 ; preds = %vector.body
399  ret void
400}
401
402define <vscale x 1 x i64> @gather_loopless(ptr %p, i64 %stride) {
403; CHECK-LABEL: @gather_loopless(
404; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4
405; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
406; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
407; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
408; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
409;
410  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
411  %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %stride, i64 0
412  %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
413  %offsets = mul <vscale x 1 x i64> %step, %splat
414  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsets
415  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
416  <vscale x 1 x ptr> %ptrs,
417  i32 8,
418  <vscale x 1 x i1> splat (i1 1),
419  <vscale x 1 x i64> poison
420  )
421  ret <vscale x 1 x i64> %x
422}
423
424define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) {
425; CHECK-LABEL: @straightline_offset_add(
426; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
427; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
428; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
429; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
430; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
431;
432  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
433  %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %offset, i64 0
434  %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
435  %offsetv = add <vscale x 1 x i64> %step, %splat
436  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv
437  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
438  <vscale x 1 x ptr> %ptrs,
439  i32 8,
440  <vscale x 1 x i1> splat (i1 1),
441  <vscale x 1 x i64> poison
442  )
443  ret <vscale x 1 x i64> %x
444}
445
446define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) {
447; CHECK-LABEL: @straightline_offset_disjoint_or(
448; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
449; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
450; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
451; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
452; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
453;
454  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
455  %step.shl = shl <vscale x 1 x i64> %step, splat (i64 1)
456  %offsetv = or disjoint <vscale x 1 x i64> %step.shl, splat (i64 1)
457  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv
458  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
459  <vscale x 1 x ptr> %ptrs,
460  i32 8,
461  <vscale x 1 x i1> splat (i1 true),
462  <vscale x 1 x i64> poison
463  )
464  ret <vscale x 1 x i64> %x
465}
466
467define <vscale x 1 x i64> @straightline_offset_shl(ptr %p) {
468; CHECK-LABEL: @straightline_offset_shl(
469; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
470; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
471; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
472; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
473;
474  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
475  %offset = shl <vscale x 1 x i64> %step, splat (i64 3)
476  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset
477  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
478  <vscale x 1 x ptr> %ptrs,
479  i32 8,
480  <vscale x 1 x i1> splat (i1 1),
481  <vscale x 1 x i64> poison
482  )
483  ret <vscale x 1 x i64> %x
484}
485
486define <vscale x 1 x i64> @neg_shl_is_not_commutative(ptr %p) {
487; CHECK-LABEL: @neg_shl_is_not_commutative(
488; CHECK-NEXT:    [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
489; CHECK-NEXT:    [[SPLAT_INSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 3, i64 0
490; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[SPLAT_INSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
491; CHECK-NEXT:    [[OFFSET:%.*]] = shl <vscale x 1 x i64> [[SPLAT]], [[STEP]]
492; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> [[OFFSET]]
493; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison)
494; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
495;
496  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
497  %splat.insert = insertelement <vscale x 1 x i64> poison, i64 3, i64 0
498  %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
499  %offset = shl <vscale x 1 x i64> %splat, %step
500  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset
501  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
502  <vscale x 1 x ptr> %ptrs,
503  i32 8,
504  <vscale x 1 x i1> splat (i1 1),
505  <vscale x 1 x i64> poison
506  )
507  ret <vscale x 1 x i64> %x
508}
509
510define <vscale x 1 x i64> @straightline_offset_shl_nonc(ptr %p, i64 %shift) {
511; CHECK-LABEL: @straightline_offset_shl_nonc(
512; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 1, [[SHIFT:%.*]]
513; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
514; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
515; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
516; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> poison, i32 [[TMP3]])
517; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
518;
519  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
520  %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %shift, i64 0
521  %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
522  %offset = shl <vscale x 1 x i64> %step, %splat
523  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset
524  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
525  <vscale x 1 x ptr> %ptrs,
526  i32 8,
527  <vscale x 1 x i1> splat (i1 1),
528  <vscale x 1 x i64> poison
529  )
530  ret <vscale x 1 x i64> %x
531}
532
533define void @scatter_loopless(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
534; CHECK-LABEL: @scatter_loopless(
535; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4
536; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
537; CHECK-NEXT:    call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]])
538; CHECK-NEXT:    ret void
539;
540  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
541  %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %stride, i64 0
542  %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
543  %offsets = mul <vscale x 1 x i64> %step, %splat
544  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsets
545  call void @llvm.masked.scatter.nxv1i64.nxv1p0(
546  <vscale x 1 x i64> %x,
547  <vscale x 1 x ptr> %ptrs,
548  i32 8,
549  <vscale x 1 x i1> splat (i1 1)
550  )
551  ret void
552}
553
554; We previously crashed expecting a constant to be fixed length.
555define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
556; CHECK-LABEL: @constant_stride(
557; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> zeroinitializer
558; CHECK-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true))
559; CHECK-NEXT:    ret void
560;
561  %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> zeroinitializer
562  call void @llvm.masked.scatter.nxv1i64.nxv1p0(
563  <vscale x 1 x i64> %x,
564  <vscale x 1 x ptr> %ptrs,
565  i32 8,
566  <vscale x 1 x i1> splat (i1 1)
567  )
568  ret void
569}
570
571define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
572; CHECK-LABEL: @vector_base_scalar_offset(
573; CHECK-NEXT:    [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
574; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
575; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
576; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
577; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
578;
579  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
580  %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %step
581  %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, i64 %offset
582  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
583  <vscale x 1 x ptr> %ptrs2,
584  i32 8,
585  <vscale x 1 x i1> splat (i1 1),
586  <vscale x 1 x i64> poison
587  )
588  ret <vscale x 1 x i64> %x
589}
590
591define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) {
592; CHECK-LABEL: @splat_base_scalar_offset(
593; CHECK-NEXT:    [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
594; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
595; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
596; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
597; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
598;
599  %head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0
600  %splat = shufflevector <vscale x 1 x ptr> %head, <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
601  %ptrs = getelementptr i64, <vscale x 1 x ptr> %splat, i64 %offset
602  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
603  <vscale x 1 x ptr> %ptrs,
604  i32 8,
605  <vscale x 1 x i1> splat (i1 1),
606  <vscale x 1 x i64> poison
607  )
608  ret <vscale x 1 x i64> %x
609}
610
611; We shouldn't be able to determine a stride here.
612define <vscale x 1 x i64> @nonstrided_base_scalar_offset(ptr %p, <vscale x 1 x i64> %v, i64 %offset) {
613; CHECK-LABEL: @nonstrided_base_scalar_offset(
614; CHECK-NEXT:    [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[V:%.*]]
615; CHECK-NEXT:    [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], i64 [[OFFSET:%.*]]
616; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison)
617; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
618;
619  %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %v
620  %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, i64 %offset
621  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
622  <vscale x 1 x ptr> %ptrs2,
623  i32 8,
624  <vscale x 1 x i1> splat (i1 1),
625  <vscale x 1 x i64> poison
626  )
627  ret <vscale x 1 x i64> %x
628}
629
630; We shouldn't be able to determine a scalar base here.
631define <vscale x 1 x i64> @vector_base_vector_offset(ptr %p, <vscale x 1 x i64> %offset) {
632; CHECK-LABEL: @vector_base_vector_offset(
633; CHECK-NEXT:    [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
634; CHECK-NEXT:    [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[STEP]]
635; CHECK-NEXT:    [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], <vscale x 1 x i64> [[OFFSET:%.*]]
636; CHECK-NEXT:    [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison)
637; CHECK-NEXT:    ret <vscale x 1 x i64> [[X]]
638;
639  %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
640  %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %step
641  %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, <vscale x 1 x i64> %offset
642  %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(
643  <vscale x 1 x ptr> %ptrs2,
644  i32 8,
645  <vscale x 1 x i1> splat (i1 1),
646  <vscale x 1 x i64> poison
647  )
648  ret <vscale x 1 x i64> %x
649}
650
651declare i64 @llvm.vscale.i64()
652declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
653declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
654
655
656define <vscale x 1 x i64> @vp_gather(ptr %a, i32 %len) {
657; CHECK-LABEL: @vp_gather(
658; CHECK-NEXT:  vector.ph:
659; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
660; CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
661; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
662; CHECK:       vector.body:
663; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
664; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
665; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
666; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
667; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42)
668; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
669; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
670; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
671; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
672; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
673; CHECK:       for.cond.cleanup:
674; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
675;
676vector.ph:
677  %wide.trip.count = zext i32 %len to i64
678  %0 = tail call i64 @llvm.vscale.i64()
679  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
680  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
681  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
682  br label %vector.body
683
684vector.body:                                      ; preds = %vector.body, %vector.ph
685  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
686  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
687  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
688  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
689  %gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42)
690  %accum.next = add <vscale x 1 x i64> %accum, %gather
691  %index.next = add nuw i64 %index, %0
692  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
693  %3 = icmp ne i64 %index.next, %wide.trip.count
694  br i1 %3, label %for.cond.cleanup, label %vector.body
695
696for.cond.cleanup:                                 ; preds = %vector.body
697  ret <vscale x 1 x i64> %accum.next
698}
699
700define void @vp_scatter(ptr %a, i32 %len) {
701; CHECK-LABEL: @vp_scatter(
702; CHECK-NEXT:  vector.ph:
703; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
704; CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
705; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
706; CHECK:       vector.body:
707; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
708; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
709; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
710; CHECK-NEXT:    call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42)
711; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
712; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
713; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
714; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
715; CHECK:       for.cond.cleanup:
716; CHECK-NEXT:    ret void
717;
718vector.ph:
719  %wide.trip.count = zext i32 %len to i64
720  %0 = tail call i64 @llvm.vscale.i64()
721  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
722  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0
723  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
724  br label %vector.body
725
726vector.body:                                      ; preds = %vector.body, %vector.ph
727  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
728  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
729  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
730  tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42)
731  %index.next = add nuw i64 %index, %0
732  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
733  %3 = icmp ne i64 %index.next, %wide.trip.count
734  br i1 %3, label %for.cond.cleanup, label %vector.body
735
736for.cond.cleanup:                                 ; preds = %vector.body
737  ret void
738}
739
740; Test that reflects what the loop vectorizer will generate for an EVL tail
741; folded loop
742
743define <vscale x 1 x i64> @evl_gather(ptr %a, i32 %len) {
744; CHECK-LABEL: @evl_gather(
745; CHECK-NEXT:  vector.ph:
746; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
747; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
748; CHECK:       vector.body:
749; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
750; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
751; CHECK-NEXT:    [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
752; CHECK-NEXT:    [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
753; CHECK-NEXT:    [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
754; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
755; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
756; CHECK-NEXT:    [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
757; CHECK-NEXT:    [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
758; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
759; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]]
760; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
761; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
762; CHECK:       for.cond.cleanup:
763; CHECK-NEXT:    ret <vscale x 1 x i64> [[ACCUM_NEXT]]
764;
765vector.ph:
766  %wide.trip.count = zext i32 %len to i64
767  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
768  br label %vector.body
769
770vector.body:                                      ; preds = %vector.body, %vector.ph
771  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
772  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
773  %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
774
775  %elems = sub i64 %wide.trip.count, %index
776  %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
777
778  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
779  %gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl)
780  %accum.next = add <vscale x 1 x i64> %accum, %gather
781
782  %evl.zext = zext i32 %evl to i64
783  %index.next = add nuw i64 %index, %evl.zext
784  %evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0
785  %evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
786  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat
787  %3 = icmp ne i64 %index.next, %wide.trip.count
788  br i1 %3, label %for.cond.cleanup, label %vector.body
789
790for.cond.cleanup:                                 ; preds = %vector.body
791  ret <vscale x 1 x i64> %accum.next
792}
793
794; Test that reflects what the loop vectorizer will generate for an EVL tail
795; folded loop
796
797define void @evl_scatter(ptr %a, i32 %len) {
798; CHECK-LABEL: @evl_scatter(
799; CHECK-NEXT:  vector.ph:
800; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
801; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
802; CHECK:       vector.body:
803; CHECK-NEXT:    [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
804; CHECK-NEXT:    [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
805; CHECK-NEXT:    [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR1]]
806; CHECK-NEXT:    [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
807; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
808; CHECK-NEXT:    call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
809; CHECK-NEXT:    [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
810; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR1]] = add nuw i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]]
811; CHECK-NEXT:    [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
812; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR1]], [[WIDE_TRIP_COUNT]]
813; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
814; CHECK:       for.cond.cleanup:
815; CHECK-NEXT:    ret void
816;
817vector.ph:
818  %wide.trip.count = zext i32 %len to i64
819  %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
820  br label %vector.body
821
822vector.body:                                      ; preds = %vector.body, %vector.ph
823  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
824  %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
825
826  %elems = sub i64 %wide.trip.count, %index
827  %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
828
829  %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
830  tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl)
831
832  %evl.zext = zext i32 %evl to i64
833  %index.next = add nuw i64 %index, %evl.zext
834  %evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0
835  %evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
836  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat
837  %3 = icmp ne i64 %index.next, %wide.trip.count
838  br i1 %3, label %for.cond.cleanup, label %vector.body
839
840for.cond.cleanup:                                 ; preds = %vector.body
841  ret void
842}
843