xref: /llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (revision 6c787ff6cfb5fdf489019a1389f8315391ad435f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck %s
3; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=off -mtriple=riscv64 -mattr=+v -S | FileCheck %s --check-prefix=FIXED
4; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -mtriple=riscv64 -mattr=+v -S | FileCheck %s --check-prefix=SCALABLE
5
6define void @load_store_factor2_i32(ptr %p) {
7; CHECK-LABEL: @load_store_factor2_i32(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
10; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
11; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
12; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
13; CHECK:       vector.ph:
14; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
15; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
17; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
18; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
19; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
20; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
21; CHECK:       vector.body:
22; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
23; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
24; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
25; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
26; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
27; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
28; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
29; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
30; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 1)
31; CHECK-NEXT:    [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], splat (i32 2)
32; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x i32> [[TMP15]])
33; CHECK-NEXT:    store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
34; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
35; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
36; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
37; CHECK:       middle.block:
38; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
39; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
40; CHECK:       scalar.ph:
41; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
42; CHECK-NEXT:    br label [[LOOP:%.*]]
43; CHECK:       loop:
44; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
45; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
46; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
47; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
48; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
49; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
50; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
51; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
52; CHECK-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
53; CHECK-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
54; CHECK-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
55; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
56; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
57; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
58; CHECK:       exit:
59; CHECK-NEXT:    ret void
60;
61; FIXED-LABEL: @load_store_factor2_i32(
62; FIXED-NEXT:  entry:
63; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
64; FIXED:       vector.ph:
65; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
66; FIXED:       vector.body:
67; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
68; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
69; FIXED-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 1
70; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
71; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4
72; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
73; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
74; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
75; FIXED-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
76; FIXED-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
77; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
78; FIXED-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
79; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
80; FIXED-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
81; FIXED-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
82; FIXED:       middle.block:
83; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
84; FIXED:       scalar.ph:
85; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
86; FIXED-NEXT:    br label [[LOOP:%.*]]
87; FIXED:       loop:
88; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
89; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
90; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
91; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
92; FIXED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
93; FIXED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
94; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
95; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
96; FIXED-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
97; FIXED-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
98; FIXED-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
99; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
100; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
101; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
102; FIXED:       exit:
103; FIXED-NEXT:    ret void
104;
105; SCALABLE-LABEL: @load_store_factor2_i32(
106; SCALABLE-NEXT:  entry:
107; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
108; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
109; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
110; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
111; SCALABLE:       vector.ph:
112; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
113; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
114; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
115; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
116; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
117; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
118; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
119; SCALABLE:       vector.body:
120; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
121; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
122; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
123; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
124; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
125; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
126; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
127; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
128; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 1)
129; SCALABLE-NEXT:    [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], splat (i32 2)
130; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x i32> [[TMP15]])
131; SCALABLE-NEXT:    store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
132; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
133; SCALABLE-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
134; SCALABLE-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
135; SCALABLE:       middle.block:
136; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
137; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
138; SCALABLE:       scalar.ph:
139; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
140; SCALABLE-NEXT:    br label [[LOOP:%.*]]
141; SCALABLE:       loop:
142; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
143; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
144; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
145; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
146; SCALABLE-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
147; SCALABLE-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
148; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
149; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
150; SCALABLE-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
151; SCALABLE-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
152; SCALABLE-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
153; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
154; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
155; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
156; SCALABLE:       exit:
157; SCALABLE-NEXT:    ret void
158;
159entry:
160  br label %loop
161loop:
162  %i = phi i64 [0, %entry], [%nexti, %loop]
163
164  %offset0 = shl i64 %i, 1
165  %q0 = getelementptr i32, ptr %p, i64 %offset0
166  %x0 = load i32, ptr %q0
167  %y0 = add i32 %x0, 1
168  store i32 %y0, ptr %q0
169
170  %offset1 = add i64 %offset0, 1
171  %q1 = getelementptr i32, ptr %p, i64 %offset1
172  %x1 = load i32, ptr %q1
173  %y1 = add i32 %x1, 2
174  store i32 %y1, ptr %q1
175
176  %nexti = add i64 %i, 1
177  %done = icmp eq i64 %nexti, 1024
178  br i1 %done, label %exit, label %loop
179exit:
180  ret void
181}
182
183define void @load_store_factor2_i64(ptr %p) {
184; CHECK-LABEL: @load_store_factor2_i64(
185; CHECK-NEXT:  entry:
186; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
187; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
188; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
189; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
190; CHECK:       vector.ph:
191; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
192; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
193; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
194; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
195; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
196; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
197; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
198; CHECK:       vector.body:
199; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
200; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
201; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
202; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
203; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
204; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
205; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
206; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
207; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1)
208; CHECK-NEXT:    [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2)
209; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[TMP15]])
210; CHECK-NEXT:    store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
211; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
212; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
213; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
214; CHECK:       middle.block:
215; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
216; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
217; CHECK:       scalar.ph:
218; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
219; CHECK-NEXT:    br label [[LOOP:%.*]]
220; CHECK:       loop:
221; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
222; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
223; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
224; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
225; CHECK-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
226; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
227; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
228; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
229; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
230; CHECK-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
231; CHECK-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
232; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
233; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
234; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
235; CHECK:       exit:
236; CHECK-NEXT:    ret void
237;
238; FIXED-LABEL: @load_store_factor2_i64(
239; FIXED-NEXT:  entry:
240; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
241; FIXED:       vector.ph:
242; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
243; FIXED:       vector.body:
244; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
245; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
246; FIXED-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 1
247; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
248; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP2]], align 8
249; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
250; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
251; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
252; FIXED-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
253; FIXED-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
254; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
255; FIXED-NEXT:    store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
256; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
257; FIXED-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
258; FIXED-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
259; FIXED:       middle.block:
260; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
261; FIXED:       scalar.ph:
262; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
263; FIXED-NEXT:    br label [[LOOP:%.*]]
264; FIXED:       loop:
265; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
266; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
267; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
268; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
269; FIXED-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
270; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
271; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
272; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
273; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
274; FIXED-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
275; FIXED-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
276; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
277; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
278; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
279; FIXED:       exit:
280; FIXED-NEXT:    ret void
281;
282; SCALABLE-LABEL: @load_store_factor2_i64(
283; SCALABLE-NEXT:  entry:
284; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
285; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
286; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
287; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
288; SCALABLE:       vector.ph:
289; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
290; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
291; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
292; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
293; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
294; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
295; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
296; SCALABLE:       vector.body:
297; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
298; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
299; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
300; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
301; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
302; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
303; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
304; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
305; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1)
306; SCALABLE-NEXT:    [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2)
307; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[TMP15]])
308; SCALABLE-NEXT:    store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
309; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
310; SCALABLE-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
311; SCALABLE-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
312; SCALABLE:       middle.block:
313; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
314; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
315; SCALABLE:       scalar.ph:
316; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
317; SCALABLE-NEXT:    br label [[LOOP:%.*]]
318; SCALABLE:       loop:
319; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
320; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
321; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
322; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
323; SCALABLE-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
324; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
325; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
326; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
327; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
328; SCALABLE-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
329; SCALABLE-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
330; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
331; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
332; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
333; SCALABLE:       exit:
334; SCALABLE-NEXT:    ret void
335;
336entry:
337  br label %loop
338loop:
339  %i = phi i64 [0, %entry], [%nexti, %loop]
340
341  %offset0 = shl i64 %i, 1
342  %q0 = getelementptr i64, ptr %p, i64 %offset0
343  %x0 = load i64, ptr %q0
344  %y0 = add i64 %x0, 1
345  store i64 %y0, ptr %q0
346
347  %offset1 = add i64 %offset0, 1
348  %q1 = getelementptr i64, ptr %p, i64 %offset1
349  %x1 = load i64, ptr %q1
350  %y1 = add i64 %x1, 2
351  store i64 %y1, ptr %q1
352
353  %nexti = add i64 %i, 1
354  %done = icmp eq i64 %nexti, 1024
355  br i1 %done, label %exit, label %loop
356exit:
357  ret void
358}
359
360define void @load_store_factor3_i32(ptr %p) {
361; CHECK-LABEL: @load_store_factor3_i32(
362; CHECK-NEXT:  entry:
363; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
364; CHECK:       vector.ph:
365; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
366; CHECK:       vector.body:
367; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
368; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
369; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
370; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
371; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4
372; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
373; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
374; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
375; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
376; CHECK-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
377; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
378; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
379; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
380; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
381; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
382; CHECK-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
383; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
384; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
385; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
386; CHECK:       middle.block:
387; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
388; CHECK:       scalar.ph:
389; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
390; CHECK-NEXT:    br label [[LOOP:%.*]]
391; CHECK:       loop:
392; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
393; CHECK-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
394; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
395; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
396; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
397; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
398; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
399; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
400; CHECK-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
401; CHECK-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
402; CHECK-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
403; CHECK-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
404; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
405; CHECK-NEXT:    [[X2:%.*]] = load i32, ptr [[Q2]], align 4
406; CHECK-NEXT:    [[Y2:%.*]] = add i32 [[X2]], 3
407; CHECK-NEXT:    store i32 [[Y2]], ptr [[Q2]], align 4
408; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
409; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
410; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
411; CHECK:       exit:
412; CHECK-NEXT:    ret void
413;
414; FIXED-LABEL: @load_store_factor3_i32(
415; FIXED-NEXT:  entry:
416; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
417; FIXED:       vector.ph:
418; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
419; FIXED:       vector.body:
420; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
421; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
422; FIXED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
423; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
424; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4
425; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
426; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
427; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
428; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
429; FIXED-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
430; FIXED-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
431; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
432; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
433; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
434; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
435; FIXED-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
436; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
437; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
438; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
439; FIXED:       middle.block:
440; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
441; FIXED:       scalar.ph:
442; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
443; FIXED-NEXT:    br label [[LOOP:%.*]]
444; FIXED:       loop:
445; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
446; FIXED-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
447; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
448; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
449; FIXED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
450; FIXED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
451; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
452; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
453; FIXED-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
454; FIXED-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
455; FIXED-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
456; FIXED-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
457; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
458; FIXED-NEXT:    [[X2:%.*]] = load i32, ptr [[Q2]], align 4
459; FIXED-NEXT:    [[Y2:%.*]] = add i32 [[X2]], 3
460; FIXED-NEXT:    store i32 [[Y2]], ptr [[Q2]], align 4
461; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
462; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
463; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
464; FIXED:       exit:
465; FIXED-NEXT:    ret void
466;
467; SCALABLE-LABEL: @load_store_factor3_i32(
468; SCALABLE-NEXT:  entry:
469; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
470; SCALABLE:       vector.ph:
471; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
472; SCALABLE:       vector.body:
473; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
474; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
475; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
476; SCALABLE-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
477; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4
478; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
479; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
480; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
481; SCALABLE-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
482; SCALABLE-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
483; SCALABLE-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
484; SCALABLE-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
485; SCALABLE-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
486; SCALABLE-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
487; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
488; SCALABLE-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
489; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
490; SCALABLE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
491; SCALABLE-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
492; SCALABLE:       middle.block:
493; SCALABLE-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
494; SCALABLE:       scalar.ph:
495; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
496; SCALABLE-NEXT:    br label [[LOOP:%.*]]
497; SCALABLE:       loop:
498; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
499; SCALABLE-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
500; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
501; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
502; SCALABLE-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
503; SCALABLE-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
504; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
505; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
506; SCALABLE-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
507; SCALABLE-NEXT:    [[Y1:%.*]] = add i32 [[X1]], 2
508; SCALABLE-NEXT:    store i32 [[Y1]], ptr [[Q1]], align 4
509; SCALABLE-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
510; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]]
511; SCALABLE-NEXT:    [[X2:%.*]] = load i32, ptr [[Q2]], align 4
512; SCALABLE-NEXT:    [[Y2:%.*]] = add i32 [[X2]], 3
513; SCALABLE-NEXT:    store i32 [[Y2]], ptr [[Q2]], align 4
514; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
515; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
516; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
517; SCALABLE:       exit:
518; SCALABLE-NEXT:    ret void
519;
520entry:
521  br label %loop
522loop:
523  %i = phi i64 [0, %entry], [%nexti, %loop]
524
525  %offset0 = mul i64 %i, 3
526  %q0 = getelementptr i32, ptr %p, i64 %offset0
527  %x0 = load i32, ptr %q0
528  %y0 = add i32 %x0, 1
529  store i32 %y0, ptr %q0
530
531  %offset1 = add i64 %offset0, 1
532  %q1 = getelementptr i32, ptr %p, i64 %offset1
533  %x1 = load i32, ptr %q1
534  %y1 = add i32 %x1, 2
535  store i32 %y1, ptr %q1
536
537  %offset2 = add i64 %offset1, 1
538  %q2 = getelementptr i32, ptr %p, i64 %offset2
539  %x2 = load i32, ptr %q2
540  %y2 = add i32 %x2, 3
541  store i32 %y2, ptr %q2
542
543  %nexti = add i64 %i, 1
544  %done = icmp eq i64 %nexti, 1024
545  br i1 %done, label %exit, label %loop
546exit:
547  ret void
548}
549
550define void @load_store_factor3_i64(ptr %p) {
551; CHECK-LABEL: @load_store_factor3_i64(
552; CHECK-NEXT:  entry:
553; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
554; CHECK:       vector.ph:
555; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
556; CHECK:       vector.body:
557; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
558; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
559; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
560; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
561; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8
562; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
563; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
564; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
565; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
566; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
567; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
568; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
569; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
570; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
571; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
572; CHECK-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
573; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
574; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
575; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
576; CHECK:       middle.block:
577; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
578; CHECK:       scalar.ph:
579; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
580; CHECK-NEXT:    br label [[LOOP:%.*]]
581; CHECK:       loop:
582; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
583; CHECK-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
584; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
585; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
586; CHECK-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
587; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
588; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
589; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
590; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
591; CHECK-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
592; CHECK-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
593; CHECK-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
594; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
595; CHECK-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
596; CHECK-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
597; CHECK-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
598; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
599; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
600; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
601; CHECK:       exit:
602; CHECK-NEXT:    ret void
603;
604; FIXED-LABEL: @load_store_factor3_i64(
605; FIXED-NEXT:  entry:
606; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
607; FIXED:       vector.ph:
608; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
609; FIXED:       vector.body:
610; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
611; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
612; FIXED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
613; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
614; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8
615; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
616; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
617; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
618; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
619; FIXED-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
620; FIXED-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
621; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
622; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
623; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
624; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
625; FIXED-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
626; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
627; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
628; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
629; FIXED:       middle.block:
630; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
631; FIXED:       scalar.ph:
632; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
633; FIXED-NEXT:    br label [[LOOP:%.*]]
634; FIXED:       loop:
635; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
636; FIXED-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
637; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
638; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
639; FIXED-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
640; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
641; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
642; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
643; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
644; FIXED-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
645; FIXED-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
646; FIXED-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
647; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
648; FIXED-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
649; FIXED-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
650; FIXED-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
651; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
652; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
653; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
654; FIXED:       exit:
655; FIXED-NEXT:    ret void
656;
657; SCALABLE-LABEL: @load_store_factor3_i64(
658; SCALABLE-NEXT:  entry:
659; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
660; SCALABLE:       vector.ph:
661; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
662; SCALABLE:       vector.body:
663; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
664; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
665; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 3
666; SCALABLE-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
667; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8
668; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
669; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
670; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
671; SCALABLE-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
672; SCALABLE-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
673; SCALABLE-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
674; SCALABLE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
675; SCALABLE-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
676; SCALABLE-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
677; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
678; SCALABLE-NEXT:    store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
679; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
680; SCALABLE-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
681; SCALABLE-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
682; SCALABLE:       middle.block:
683; SCALABLE-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
684; SCALABLE:       scalar.ph:
685; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
686; SCALABLE-NEXT:    br label [[LOOP:%.*]]
687; SCALABLE:       loop:
688; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
689; SCALABLE-NEXT:    [[OFFSET0:%.*]] = mul i64 [[I]], 3
690; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
691; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
692; SCALABLE-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
693; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
694; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
695; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
696; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
697; SCALABLE-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
698; SCALABLE-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
699; SCALABLE-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
700; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
701; SCALABLE-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
702; SCALABLE-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
703; SCALABLE-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
704; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
705; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
706; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
707; SCALABLE:       exit:
708; SCALABLE-NEXT:    ret void
709;
710entry:
711  br label %loop
712loop:
713  %i = phi i64 [0, %entry], [%nexti, %loop]
714
715  %offset0 = mul i64 %i, 3
716  %q0 = getelementptr i64, ptr %p, i64 %offset0
717  %x0 = load i64, ptr %q0
718  %y0 = add i64 %x0, 1
719  store i64 %y0, ptr %q0
720
721  %offset1 = add i64 %offset0, 1
722  %q1 = getelementptr i64, ptr %p, i64 %offset1
723  %x1 = load i64, ptr %q1
724  %y1 = add i64 %x1, 2
725  store i64 %y1, ptr %q1
726
727  %offset2 = add i64 %offset1, 1
728  %q2 = getelementptr i64, ptr %p, i64 %offset2
729  %x2 = load i64, ptr %q2
730  %y2 = add i64 %x2, 3
731  store i64 %y2, ptr %q2
732
733  %nexti = add i64 %i, 1
734  %done = icmp eq i64 %nexti, 1024
735  br i1 %done, label %exit, label %loop
736exit:
737  ret void
738}
739
740define void @load_store_factor8(ptr %p) {
741; CHECK-LABEL: @load_store_factor8(
742; CHECK-NEXT:  entry:
743; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
744; CHECK:       vector.ph:
745; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
746; CHECK:       vector.body:
747; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
748; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
749; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 3
750; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
751; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8
752; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
753; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
754; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
755; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 3, i32 11>
756; CHECK-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 4, i32 12>
757; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
758; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
759; CHECK-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
760; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
761; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
762; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
763; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
764; CHECK-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
765; CHECK-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
766; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
767; CHECK-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
768; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
769; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
770; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
771; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
772; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
773; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
774; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
775; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
776; CHECK-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
777; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
778; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
779; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
780; CHECK:       middle.block:
781; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
782; CHECK:       scalar.ph:
783; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
784; CHECK-NEXT:    br label [[LOOP:%.*]]
785; CHECK:       loop:
786; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
787; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 3
788; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
789; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
790; CHECK-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
791; CHECK-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
792; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
793; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
794; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
795; CHECK-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
796; CHECK-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
797; CHECK-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
798; CHECK-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
799; CHECK-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
800; CHECK-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
801; CHECK-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
802; CHECK-NEXT:    [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
803; CHECK-NEXT:    [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
804; CHECK-NEXT:    [[X3:%.*]] = load i64, ptr [[Q3]], align 8
805; CHECK-NEXT:    [[Y3:%.*]] = add i64 [[X3]], 4
806; CHECK-NEXT:    store i64 [[Y3]], ptr [[Q3]], align 8
807; CHECK-NEXT:    [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
808; CHECK-NEXT:    [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
809; CHECK-NEXT:    [[X4:%.*]] = load i64, ptr [[Q4]], align 8
810; CHECK-NEXT:    [[Y4:%.*]] = add i64 [[X4]], 5
811; CHECK-NEXT:    store i64 [[Y4]], ptr [[Q4]], align 8
812; CHECK-NEXT:    [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
813; CHECK-NEXT:    [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
814; CHECK-NEXT:    [[X5:%.*]] = load i64, ptr [[Q5]], align 8
815; CHECK-NEXT:    [[Y5:%.*]] = add i64 [[X5]], 6
816; CHECK-NEXT:    store i64 [[Y5]], ptr [[Q5]], align 8
817; CHECK-NEXT:    [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1
818; CHECK-NEXT:    [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]]
819; CHECK-NEXT:    [[X6:%.*]] = load i64, ptr [[Q6]], align 8
820; CHECK-NEXT:    [[Y6:%.*]] = add i64 [[X6]], 7
821; CHECK-NEXT:    store i64 [[Y6]], ptr [[Q6]], align 8
822; CHECK-NEXT:    [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1
823; CHECK-NEXT:    [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]]
824; CHECK-NEXT:    [[X7:%.*]] = load i64, ptr [[Q7]], align 8
825; CHECK-NEXT:    [[Y7:%.*]] = add i64 [[X7]], 8
826; CHECK-NEXT:    store i64 [[Y7]], ptr [[Q7]], align 8
827; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
828; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
829; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
830; CHECK:       exit:
831; CHECK-NEXT:    ret void
832;
833; FIXED-LABEL: @load_store_factor8(
834; FIXED-NEXT:  entry:
835; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
836; FIXED:       vector.ph:
837; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
838; FIXED:       vector.body:
839; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
840; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
841; FIXED-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 3
842; FIXED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
843; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8
844; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
845; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
846; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
847; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 3, i32 11>
848; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 4, i32 12>
849; FIXED-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
850; FIXED-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
851; FIXED-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
852; FIXED-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
853; FIXED-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
854; FIXED-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
855; FIXED-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
856; FIXED-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
857; FIXED-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
858; FIXED-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
859; FIXED-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
860; FIXED-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
861; FIXED-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
862; FIXED-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
863; FIXED-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
864; FIXED-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
865; FIXED-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
866; FIXED-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
867; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
868; FIXED-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
869; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
870; FIXED-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
871; FIXED-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
872; FIXED:       middle.block:
873; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
874; FIXED:       scalar.ph:
875; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
876; FIXED-NEXT:    br label [[LOOP:%.*]]
877; FIXED:       loop:
878; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
879; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 3
880; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
881; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
882; FIXED-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
883; FIXED-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
884; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
885; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
886; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
887; FIXED-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
888; FIXED-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
889; FIXED-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
890; FIXED-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
891; FIXED-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
892; FIXED-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
893; FIXED-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
894; FIXED-NEXT:    [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
895; FIXED-NEXT:    [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
896; FIXED-NEXT:    [[X3:%.*]] = load i64, ptr [[Q3]], align 8
897; FIXED-NEXT:    [[Y3:%.*]] = add i64 [[X3]], 4
898; FIXED-NEXT:    store i64 [[Y3]], ptr [[Q3]], align 8
899; FIXED-NEXT:    [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
900; FIXED-NEXT:    [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
901; FIXED-NEXT:    [[X4:%.*]] = load i64, ptr [[Q4]], align 8
902; FIXED-NEXT:    [[Y4:%.*]] = add i64 [[X4]], 5
903; FIXED-NEXT:    store i64 [[Y4]], ptr [[Q4]], align 8
904; FIXED-NEXT:    [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
905; FIXED-NEXT:    [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
906; FIXED-NEXT:    [[X5:%.*]] = load i64, ptr [[Q5]], align 8
907; FIXED-NEXT:    [[Y5:%.*]] = add i64 [[X5]], 6
908; FIXED-NEXT:    store i64 [[Y5]], ptr [[Q5]], align 8
909; FIXED-NEXT:    [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1
910; FIXED-NEXT:    [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]]
911; FIXED-NEXT:    [[X6:%.*]] = load i64, ptr [[Q6]], align 8
912; FIXED-NEXT:    [[Y6:%.*]] = add i64 [[X6]], 7
913; FIXED-NEXT:    store i64 [[Y6]], ptr [[Q6]], align 8
914; FIXED-NEXT:    [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1
915; FIXED-NEXT:    [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]]
916; FIXED-NEXT:    [[X7:%.*]] = load i64, ptr [[Q7]], align 8
917; FIXED-NEXT:    [[Y7:%.*]] = add i64 [[X7]], 8
918; FIXED-NEXT:    store i64 [[Y7]], ptr [[Q7]], align 8
919; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
920; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
921; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
922; FIXED:       exit:
923; FIXED-NEXT:    ret void
924;
925; SCALABLE-LABEL: @load_store_factor8(
926; SCALABLE-NEXT:  entry:
927; SCALABLE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
928; SCALABLE:       vector.ph:
929; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
930; SCALABLE:       vector.body:
931; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
932; SCALABLE-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
933; SCALABLE-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 3
934; SCALABLE-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
935; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8
936; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 0, i32 8>
937; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 1, i32 9>
938; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 2, i32 10>
939; SCALABLE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 3, i32 11>
940; SCALABLE-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 4, i32 12>
941; SCALABLE-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
942; SCALABLE-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
943; SCALABLE-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
944; SCALABLE-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
945; SCALABLE-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
946; SCALABLE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
947; SCALABLE-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
948; SCALABLE-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
949; SCALABLE-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
950; SCALABLE-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
951; SCALABLE-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
952; SCALABLE-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
953; SCALABLE-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
954; SCALABLE-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
955; SCALABLE-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
956; SCALABLE-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
957; SCALABLE-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
958; SCALABLE-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
959; SCALABLE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
960; SCALABLE-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
961; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
962; SCALABLE-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
963; SCALABLE-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
964; SCALABLE:       middle.block:
965; SCALABLE-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
966; SCALABLE:       scalar.ph:
967; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
968; SCALABLE-NEXT:    br label [[LOOP:%.*]]
969; SCALABLE:       loop:
970; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
971; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 3
972; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
973; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
974; SCALABLE-NEXT:    [[Y0:%.*]] = add i64 [[X0]], 1
975; SCALABLE-NEXT:    store i64 [[Y0]], ptr [[Q0]], align 8
976; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
977; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
978; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
979; SCALABLE-NEXT:    [[Y1:%.*]] = add i64 [[X1]], 2
980; SCALABLE-NEXT:    store i64 [[Y1]], ptr [[Q1]], align 8
981; SCALABLE-NEXT:    [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1
982; SCALABLE-NEXT:    [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]]
983; SCALABLE-NEXT:    [[X2:%.*]] = load i64, ptr [[Q2]], align 8
984; SCALABLE-NEXT:    [[Y2:%.*]] = add i64 [[X2]], 3
985; SCALABLE-NEXT:    store i64 [[Y2]], ptr [[Q2]], align 8
986; SCALABLE-NEXT:    [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1
987; SCALABLE-NEXT:    [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]]
988; SCALABLE-NEXT:    [[X3:%.*]] = load i64, ptr [[Q3]], align 8
989; SCALABLE-NEXT:    [[Y3:%.*]] = add i64 [[X3]], 4
990; SCALABLE-NEXT:    store i64 [[Y3]], ptr [[Q3]], align 8
991; SCALABLE-NEXT:    [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1
992; SCALABLE-NEXT:    [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]]
993; SCALABLE-NEXT:    [[X4:%.*]] = load i64, ptr [[Q4]], align 8
994; SCALABLE-NEXT:    [[Y4:%.*]] = add i64 [[X4]], 5
995; SCALABLE-NEXT:    store i64 [[Y4]], ptr [[Q4]], align 8
996; SCALABLE-NEXT:    [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1
997; SCALABLE-NEXT:    [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]]
998; SCALABLE-NEXT:    [[X5:%.*]] = load i64, ptr [[Q5]], align 8
999; SCALABLE-NEXT:    [[Y5:%.*]] = add i64 [[X5]], 6
1000; SCALABLE-NEXT:    store i64 [[Y5]], ptr [[Q5]], align 8
1001; SCALABLE-NEXT:    [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1
1002; SCALABLE-NEXT:    [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]]
1003; SCALABLE-NEXT:    [[X6:%.*]] = load i64, ptr [[Q6]], align 8
1004; SCALABLE-NEXT:    [[Y6:%.*]] = add i64 [[X6]], 7
1005; SCALABLE-NEXT:    store i64 [[Y6]], ptr [[Q6]], align 8
1006; SCALABLE-NEXT:    [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1
1007; SCALABLE-NEXT:    [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]]
1008; SCALABLE-NEXT:    [[X7:%.*]] = load i64, ptr [[Q7]], align 8
1009; SCALABLE-NEXT:    [[Y7:%.*]] = add i64 [[X7]], 8
1010; SCALABLE-NEXT:    store i64 [[Y7]], ptr [[Q7]], align 8
1011; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1012; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1013; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
1014; SCALABLE:       exit:
1015; SCALABLE-NEXT:    ret void
1016;
1017entry:
1018  br label %loop
1019loop:
1020  %i = phi i64 [0, %entry], [%nexti, %loop]
1021
1022  %offset0 = shl i64 %i, 3
1023  %q0 = getelementptr i64, ptr %p, i64 %offset0
1024  %x0 = load i64, ptr %q0
1025  %y0 = add i64 %x0, 1
1026  store i64 %y0, ptr %q0
1027
1028  %offset1 = add i64 %offset0, 1
1029  %q1 = getelementptr i64, ptr %p, i64 %offset1
1030  %x1 = load i64, ptr %q1
1031  %y1 = add i64 %x1, 2
1032  store i64 %y1, ptr %q1
1033
1034  %offset2 = add i64 %offset1, 1
1035  %q2 = getelementptr i64, ptr %p, i64 %offset2
1036  %x2 = load i64, ptr %q2
1037  %y2 = add i64 %x2, 3
1038  store i64 %y2, ptr %q2
1039
1040  %offset3 = add i64 %offset2, 1
1041  %q3 = getelementptr i64, ptr %p, i64 %offset3
1042  %x3 = load i64, ptr %q3
1043  %y3 = add i64 %x3, 4
1044  store i64 %y3, ptr %q3
1045
1046  %offset4 = add i64 %offset3, 1
1047  %q4 = getelementptr i64, ptr %p, i64 %offset4
1048  %x4 = load i64, ptr %q4
1049  %y4 = add i64 %x4, 5
1050  store i64 %y4, ptr %q4
1051
1052  %offset5 = add i64 %offset4, 1
1053  %q5 = getelementptr i64, ptr %p, i64 %offset5
1054  %x5 = load i64, ptr %q5
1055  %y5 = add i64 %x5, 6
1056  store i64 %y5, ptr %q5
1057
1058  %offset6 = add i64 %offset5, 1
1059  %q6 = getelementptr i64, ptr %p, i64 %offset6
1060  %x6 = load i64, ptr %q6
1061  %y6 = add i64 %x6, 7
1062  store i64 %y6, ptr %q6
1063
1064  %offset7 = add i64 %offset6, 1
1065  %q7 = getelementptr i64, ptr %p, i64 %offset7
1066  %x7 = load i64, ptr %q7
1067  %y7 = add i64 %x7, 8
1068  store i64 %y7, ptr %q7
1069
1070  %nexti = add i64 %i, 1
1071  %done = icmp eq i64 %nexti, 1024
1072  br i1 %done, label %exit, label %loop
1073exit:
1074  ret void
1075}
1076
1077define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
1078; CHECK-LABEL: @combine_load_factor2_i32(
1079; CHECK-NEXT:  entry:
1080; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1081; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1082; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
1083; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1084; CHECK:       vector.ph:
1085; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1086; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1087; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
1088; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1089; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1090; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1091; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1092; CHECK:       vector.body:
1093; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1094; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1095; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
1096; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
1097; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
1098; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
1099; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
1100; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
1101; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], [[TMP11]]
1102; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]]
1103; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
1104; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
1105; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1106; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1107; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1108; CHECK:       middle.block:
1109; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
1110; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1111; CHECK:       scalar.ph:
1112; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1113; CHECK-NEXT:    br label [[LOOP:%.*]]
1114; CHECK:       loop:
1115; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
1116; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
1117; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
1118; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
1119; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
1120; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
1121; CHECK-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
1122; CHECK-NEXT:    [[RES:%.*]] = add i32 [[X0]], [[X1]]
1123; CHECK-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
1124; CHECK-NEXT:    store i32 [[RES]], ptr [[DST]], align 4
1125; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1126; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1127; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1128; CHECK:       exit:
1129; CHECK-NEXT:    ret void
1130;
1131; FIXED-LABEL: @combine_load_factor2_i32(
1132; FIXED-NEXT:  entry:
1133; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1134; FIXED:       vector.ph:
1135; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
1136; FIXED:       vector.body:
1137; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1138; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1139; FIXED-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 8
1140; FIXED-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 1
1141; FIXED-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP1]], 1
1142; FIXED-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]]
1143; FIXED-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]]
1144; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4
1145; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1146; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1147; FIXED-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
1148; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1149; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1150; FIXED-NEXT:    [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]]
1151; FIXED-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]]
1152; FIXED-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]]
1153; FIXED-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
1154; FIXED-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 8
1155; FIXED-NEXT:    store <8 x i32> [[TMP8]], ptr [[TMP12]], align 4
1156; FIXED-NEXT:    store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4
1157; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1158; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
1159; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1160; FIXED:       middle.block:
1161; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1162; FIXED:       scalar.ph:
1163; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1164; FIXED-NEXT:    br label [[LOOP:%.*]]
1165; FIXED:       loop:
1166; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
1167; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
1168; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
1169; FIXED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
1170; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
1171; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
1172; FIXED-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
1173; FIXED-NEXT:    [[RES:%.*]] = add i32 [[X0]], [[X1]]
1174; FIXED-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
1175; FIXED-NEXT:    store i32 [[RES]], ptr [[DST]], align 4
1176; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1177; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1178; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1179; FIXED:       exit:
1180; FIXED-NEXT:    ret void
1181;
1182; SCALABLE-LABEL: @combine_load_factor2_i32(
1183; SCALABLE-NEXT:  entry:
1184; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1185; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1186; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
1187; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1188; SCALABLE:       vector.ph:
1189; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1190; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1191; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
1192; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1193; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1194; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1195; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
1196; SCALABLE:       vector.body:
1197; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1198; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1199; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
1200; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
1201; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP8]], align 4
1202; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
1203; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
1204; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
1205; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], [[TMP11]]
1206; SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]]
1207; SCALABLE-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
1208; SCALABLE-NEXT:    store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
1209; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1210; SCALABLE-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1211; SCALABLE-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1212; SCALABLE:       middle.block:
1213; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
1214; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1215; SCALABLE:       scalar.ph:
1216; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1217; SCALABLE-NEXT:    br label [[LOOP:%.*]]
1218; SCALABLE:       loop:
1219; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
1220; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
1221; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]]
1222; SCALABLE-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
1223; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
1224; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]]
1225; SCALABLE-NEXT:    [[X1:%.*]] = load i32, ptr [[Q1]], align 4
1226; SCALABLE-NEXT:    [[RES:%.*]] = add i32 [[X0]], [[X1]]
1227; SCALABLE-NEXT:    [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]]
1228; SCALABLE-NEXT:    store i32 [[RES]], ptr [[DST]], align 4
1229; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1230; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1231; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1232; SCALABLE:       exit:
1233; SCALABLE-NEXT:    ret void
1234;
1235entry:
1236  br label %loop
1237loop:
1238  %i = phi i64 [0, %entry], [%nexti, %loop]
1239
1240  %offset0 = shl i64 %i, 1
1241  %q0 = getelementptr i32, ptr %p, i64 %offset0
1242  %x0 = load i32, ptr %q0
1243
1244  %offset1 = add i64 %offset0, 1
1245  %q1 = getelementptr i32, ptr %p, i64 %offset1
1246  %x1 = load i32, ptr %q1
1247
1248  %res = add i32 %x0, %x1
1249
1250  %dst = getelementptr i32, ptr %q, i64 %i
1251  store i32 %res, ptr %dst
1252
1253  %nexti = add i64 %i, 1
1254  %done = icmp eq i64 %nexti, 1024
1255  br i1 %done, label %exit, label %loop
1256exit:
1257  ret void
1258}
1259
1260define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
1261; CHECK-LABEL: @combine_load_factor2_i64(
1262; CHECK-NEXT:  entry:
1263; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1264; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
1265; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
1266; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1267; CHECK:       vector.ph:
1268; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1269; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
1270; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
1271; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1272; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1273; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
1274; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1275; CHECK:       vector.body:
1276; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1277; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1278; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
1279; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
1280; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
1281; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
1282; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
1283; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
1284; CHECK-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], [[TMP11]]
1285; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]]
1286; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0
1287; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP12]], ptr [[TMP14]], align 8
1288; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1289; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1290; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1291; CHECK:       middle.block:
1292; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
1293; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1294; CHECK:       scalar.ph:
1295; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1296; CHECK-NEXT:    br label [[LOOP:%.*]]
1297; CHECK:       loop:
1298; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
1299; CHECK-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
1300; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
1301; CHECK-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
1302; CHECK-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
1303; CHECK-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
1304; CHECK-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
1305; CHECK-NEXT:    [[RES:%.*]] = add i64 [[X0]], [[X1]]
1306; CHECK-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
1307; CHECK-NEXT:    store i64 [[RES]], ptr [[DST]], align 8
1308; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1309; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1310; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1311; CHECK:       exit:
1312; CHECK-NEXT:    ret void
1313;
1314; FIXED-LABEL: @combine_load_factor2_i64(
1315; FIXED-NEXT:  entry:
1316; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1317; FIXED:       vector.ph:
1318; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
1319; FIXED:       vector.body:
1320; FIXED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1321; FIXED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1322; FIXED-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
1323; FIXED-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 1
1324; FIXED-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP1]], 1
1325; FIXED-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]]
1326; FIXED-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
1327; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP4]], align 8
1328; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1329; FIXED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1330; FIXED-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8
1331; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1332; FIXED-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1333; FIXED-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC3]]
1334; FIXED-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC4]]
1335; FIXED-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]]
1336; FIXED-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
1337; FIXED-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[TMP10]], i32 4
1338; FIXED-NEXT:    store <4 x i64> [[TMP8]], ptr [[TMP12]], align 8
1339; FIXED-NEXT:    store <4 x i64> [[TMP9]], ptr [[TMP13]], align 8
1340; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1341; FIXED-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
1342; FIXED-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1343; FIXED:       middle.block:
1344; FIXED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1345; FIXED:       scalar.ph:
1346; FIXED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1347; FIXED-NEXT:    br label [[LOOP:%.*]]
1348; FIXED:       loop:
1349; FIXED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
1350; FIXED-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
1351; FIXED-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
1352; FIXED-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
1353; FIXED-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
1354; FIXED-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
1355; FIXED-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
1356; FIXED-NEXT:    [[RES:%.*]] = add i64 [[X0]], [[X1]]
1357; FIXED-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
1358; FIXED-NEXT:    store i64 [[RES]], ptr [[DST]], align 8
1359; FIXED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1360; FIXED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1361; FIXED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1362; FIXED:       exit:
1363; FIXED-NEXT:    ret void
1364;
1365; SCALABLE-LABEL: @combine_load_factor2_i64(
1366; SCALABLE-NEXT:  entry:
1367; SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1368; SCALABLE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
1369; SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
1370; SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1371; SCALABLE:       vector.ph:
1372; SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1373; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
1374; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
1375; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1376; SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1377; SCALABLE-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
1378; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
1379; SCALABLE:       vector.body:
1380; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1381; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1382; SCALABLE-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 1
1383; SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
1384; SCALABLE-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP8]], align 8
1385; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
1386; SCALABLE-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
1387; SCALABLE-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
1388; SCALABLE-NEXT:    [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], [[TMP11]]
1389; SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]]
1390; SCALABLE-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0
1391; SCALABLE-NEXT:    store <vscale x 2 x i64> [[TMP12]], ptr [[TMP14]], align 8
1392; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1393; SCALABLE-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1394; SCALABLE-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1395; SCALABLE:       middle.block:
1396; SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
1397; SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1398; SCALABLE:       scalar.ph:
1399; SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1400; SCALABLE-NEXT:    br label [[LOOP:%.*]]
1401; SCALABLE:       loop:
1402; SCALABLE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
1403; SCALABLE-NEXT:    [[OFFSET0:%.*]] = shl i64 [[I]], 1
1404; SCALABLE-NEXT:    [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]]
1405; SCALABLE-NEXT:    [[X0:%.*]] = load i64, ptr [[Q0]], align 8
1406; SCALABLE-NEXT:    [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1
1407; SCALABLE-NEXT:    [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]]
1408; SCALABLE-NEXT:    [[X1:%.*]] = load i64, ptr [[Q1]], align 8
1409; SCALABLE-NEXT:    [[RES:%.*]] = add i64 [[X0]], [[X1]]
1410; SCALABLE-NEXT:    [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]]
1411; SCALABLE-NEXT:    store i64 [[RES]], ptr [[DST]], align 8
1412; SCALABLE-NEXT:    [[NEXTI]] = add i64 [[I]], 1
1413; SCALABLE-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
1414; SCALABLE-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1415; SCALABLE:       exit:
1416; SCALABLE-NEXT:    ret void
1417;
1418entry:
1419  br label %loop
1420loop:
1421  %i = phi i64 [0, %entry], [%nexti, %loop]
1422
1423  %offset0 = shl i64 %i, 1
1424  %q0 = getelementptr i64, ptr %p, i64 %offset0
1425  %x0 = load i64, ptr %q0
1426
1427  %offset1 = add i64 %offset0, 1
1428  %q1 = getelementptr i64, ptr %p, i64 %offset1
1429  %x1 = load i64, ptr %q1
1430
1431  %res = add i64 %x0, %x1
1432
1433  %dst = getelementptr i64, ptr %q, i64 %i
1434  store i64 %res, ptr %dst
1435
1436  %nexti = add i64 %i, 1
1437  %done = icmp eq i64 %nexti, 1024
1438  br i1 %done, label %exit, label %loop
1439exit:
1440  ret void
1441}
1442