xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll (revision 1de3dc7d23dd6b856efad3a3a04f2396328726d7)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -p loop-vectorize -S %s | FileCheck %s
3
4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.15.0"
6
7define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noalias %p.invar, ptr noalias %dst.1, ptr noalias %dst.2) {
8; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups(
9; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) {
10; CHECK-NEXT:  [[ENTRY:.*]]:
11; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
12; CHECK:       [[VECTOR_PH]]:
13; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
14; CHECK:       [[VECTOR_BODY]]:
15; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
16; CHECK-NEXT:    [[TMP39:%.*]] = add i64 [[INDEX]], 0
17; CHECK-NEXT:    [[TMP40:%.*]] = load float, ptr [[P_INVAR]], align 4
18; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP40]], i64 0
19; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
20; CHECK-NEXT:    [[TMP41:%.*]] = shl i64 [[TMP39]], 2
21; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[TMP41]]
22; CHECK-NEXT:    [[TMP42:%.*]] = load float, ptr [[P_INVAR]], align 4
23; CHECK-NEXT:    [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <2 x float> poison, float [[TMP42]], i64 0
24; CHECK-NEXT:    [[BROADCAST_SPLAT28:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT27]], <2 x float> poison, <2 x i32> zeroinitializer
25; CHECK-NEXT:    [[TMP46:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT28]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
26; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <4 x float> [[TMP46]], <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
27; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP47]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
28; CHECK-NEXT:    store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP44]], align 4
29; CHECK-NEXT:    [[TMP48:%.*]] = load float, ptr [[P_INVAR]], align 4
30; CHECK-NEXT:    [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0
31; CHECK-NEXT:    [[BROADCAST_SPLAT30:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT29]], <2 x float> poison, <2 x i32> zeroinitializer
32; CHECK-NEXT:    [[TMP49:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[TMP41]]
33; CHECK-NEXT:    [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT30]], <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
34; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLAT36]], <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
35; CHECK-NEXT:    [[INTERLEAVED_VEC31:%.*]] = shufflevector <8 x float> [[TMP51]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
36; CHECK-NEXT:    store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP49]], align 4
37; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
38; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
39; CHECK-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40; CHECK:       [[MIDDLE_BLOCK]]:
41; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
42; CHECK:       [[SCALAR_PH]]:
43; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
44; CHECK-NEXT:    br label %[[LOOP:.*]]
45; CHECK:       [[LOOP]]:
46; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
47; CHECK-NEXT:    [[L_0:%.*]] = load float, ptr [[P_INVAR]], align 4
48; CHECK-NEXT:    [[IV_MUL:%.*]] = shl i64 [[IV]], 2
49; CHECK-NEXT:    [[GEP_DST_19:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[IV_MUL]]
50; CHECK-NEXT:    store float [[L_0]], ptr [[GEP_DST_19]], align 4
51; CHECK-NEXT:    [[L_1:%.*]] = load float, ptr [[P_INVAR]], align 4
52; CHECK-NEXT:    [[ADD_1:%.*]] = or disjoint i64 [[IV_MUL]], 1
53; CHECK-NEXT:    [[GEP_DST_119:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_1]]
54; CHECK-NEXT:    store float [[L_1]], ptr [[GEP_DST_119]], align 4
55; CHECK-NEXT:    [[ADD_2:%.*]] = or disjoint i64 [[IV_MUL]], 2
56; CHECK-NEXT:    [[GEP_DST_129:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_2]]
57; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP_DST_129]], align 4
58; CHECK-NEXT:    [[ADD_3:%.*]] = or disjoint i64 [[IV_MUL]], 3
59; CHECK-NEXT:    [[GEP_DST_140:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_3]]
60; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP_DST_140]], align 4
61; CHECK-NEXT:    [[L_2:%.*]] = load float, ptr [[P_INVAR]], align 4
62; CHECK-NEXT:    [[GEP_DST_247:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[IV_MUL]]
63; CHECK-NEXT:    store float [[L_2]], ptr [[GEP_DST_247]], align 4
64; CHECK-NEXT:    [[GEP_DST_255:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_1]]
65; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP_DST_255]], align 4
66; CHECK-NEXT:    [[GEP_DST_265:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_2]]
67; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP_DST_265]], align 4
68; CHECK-NEXT:    [[GEP_DST_276:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_3]]
69; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP_DST_276]], align 4
70; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
71; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
72; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
73; CHECK:       [[EXIT]]:
74; CHECK-NEXT:    ret void
75;
76entry:
77  br label %loop
78
79loop:
80  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
81  %l.0 = load float, ptr %p.invar, align 4
82  %iv.mul = shl i64 %iv, 2
83  %gep.dst.19 = getelementptr float, ptr %dst.1, i64 %iv.mul
84  store float %l.0, ptr %gep.dst.19, align 4
85  %l.1 = load float, ptr %p.invar, align 4
86  %add.1 = or disjoint i64 %iv.mul, 1
87  %gep.dst.119 = getelementptr float, ptr %dst.1, i64 %add.1
88  store float %l.1, ptr %gep.dst.119, align 4
89  %add.2 = or disjoint i64 %iv.mul, 2
90  %gep.dst.129 = getelementptr float, ptr %dst.1, i64 %add.2
91  store float 0.000000e+00, ptr %gep.dst.129, align 4
92  %add.3 = or disjoint i64 %iv.mul, 3
93  %gep.dst.140 = getelementptr float, ptr %dst.1, i64 %add.3
94  store float 0.000000e+00, ptr %gep.dst.140, align 4
95  %l.2 = load float, ptr %p.invar, align 4
96  %gep.dst.247 = getelementptr float, ptr %dst.2, i64 %iv.mul
97  store float %l.2, ptr %gep.dst.247, align 4
98  %gep.dst.255 = getelementptr float, ptr %dst.2, i64 %add.1
99  store float 0.000000e+00, ptr %gep.dst.255, align 4
100  %gep.dst.265 = getelementptr float, ptr %dst.2, i64 %add.2
101  store float 0.000000e+00, ptr %gep.dst.265, align 4
102  %gep.dst.276 = getelementptr float, ptr %dst.2, i64 %add.3
103  store float 0.000000e+00, ptr %gep.dst.276, align 4
104  %iv.next = add i64 %iv, 1
105  %ec = icmp eq i64 %iv.next, 1024
106  br i1 %ec, label %exit, label %loop
107
108exit:
109  ret void
110}
111
112define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr %arg2) #0 {
113; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse(
114; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
115; CHECK-NEXT:  [[ENTRY:.*]]:
116; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[ARG1]], 1
117; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54
118; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
119; CHECK:       [[VECTOR_SCEVCHECK]]:
120; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8
121; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
122; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
123; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
124; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
125; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
126; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
127; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
128; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12
129; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
130; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
131; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
132; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]]
133; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
134; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
135; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
136; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4
137; CHECK-NEXT:    [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
138; CHECK-NEXT:    [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
139; CHECK-NEXT:    [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
140; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]]
141; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
142; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]]
143; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]]
144; CHECK-NEXT:    [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
145; CHECK-NEXT:    [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0
146; CHECK-NEXT:    [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1
147; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]]
148; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]]
149; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]]
150; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]]
151; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]]
152; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]]
153; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]]
154; CHECK-NEXT:    br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
155; CHECK:       [[VECTOR_MEMCHECK]]:
156; CHECK-NEXT:    [[TMP20:%.*]] = shl i64 [[ARG1]], 4
157; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 16
158; CHECK-NEXT:    [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP21]]
159; CHECK-NEXT:    [[TMP22:%.*]] = shl i64 [[ARG1]], 5
160; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[TMP22]], 32
161; CHECK-NEXT:    [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP23]]
162; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP13]]
163; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[ARG]], [[SCEVGEP12]]
164; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
165; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
166; CHECK:       [[VECTOR_PH]]:
167; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
168; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
169; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
170; CHECK:       [[VECTOR_BODY]]:
171; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
172; CHECK-NEXT:    [[TMP24:%.*]] = add i64 [[INDEX]], 0
173; CHECK-NEXT:    [[TMP25:%.*]] = shl i64 [[TMP24]], 5
174; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]]
175; CHECK-NEXT:    [[TMP27:%.*]] = shl i64 [[TMP24]], 4
176; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]]
177; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4
178; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
179; CHECK-NEXT:    [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
180; CHECK-NEXT:    [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
181; CHECK-NEXT:    [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
182; CHECK-NEXT:    [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
183; CHECK-NEXT:    [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
184; CHECK-NEXT:    [[STRIDED_VEC19:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
185; CHECK-NEXT:    [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
186; CHECK-NEXT:    [[TMP30:%.*]] = fadd <2 x float> [[STRIDED_VEC]], [[STRIDED_VEC17]]
187; CHECK-NEXT:    [[TMP31:%.*]] = fmul <2 x float> [[TMP30]], zeroinitializer
188; CHECK-NEXT:    [[TMP32:%.*]] = fadd <2 x float> [[STRIDED_VEC14]], [[STRIDED_VEC18]]
189; CHECK-NEXT:    [[TMP33:%.*]] = fmul <2 x float> [[TMP32]], zeroinitializer
190; CHECK-NEXT:    [[TMP34:%.*]] = fadd <2 x float> [[STRIDED_VEC15]], [[STRIDED_VEC19]]
191; CHECK-NEXT:    [[TMP35:%.*]] = fmul <2 x float> [[TMP34]], zeroinitializer
192; CHECK-NEXT:    [[TMP36:%.*]] = fadd <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC20]]
193; CHECK-NEXT:    [[TMP37:%.*]] = fmul <2 x float> [[TMP36]], zeroinitializer
194; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> [[TMP33]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
195; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
196; CHECK-NEXT:    [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
198; CHECK-NEXT:    store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4
199; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
200; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
201; CHECK-NEXT:    br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
202; CHECK:       [[MIDDLE_BLOCK]]:
203; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
204; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
205; CHECK:       [[SCALAR_PH]]:
206; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
207; CHECK-NEXT:    br label %[[LOOP:.*]]
208; CHECK:       [[LOOP]]:
209; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
210; CHECK-NEXT:    [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5
211; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[SHL_IV_5]]
212; CHECK-NEXT:    [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16
213; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]]
214; CHECK-NEXT:    [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4
215; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[SHL_IV_4]]
216; CHECK-NEXT:    [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4
217; CHECK-NEXT:    [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4
218; CHECK-NEXT:    [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]]
219; CHECK-NEXT:    [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00
220; CHECK-NEXT:    store float [[MUL_1]], ptr [[GEP_3]], align 4
221; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 4
222; CHECK-NEXT:    [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4
223; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 4
224; CHECK-NEXT:    [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4
225; CHECK-NEXT:    [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]]
226; CHECK-NEXT:    [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00
227; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 4
228; CHECK-NEXT:    store float [[MUL_2]], ptr [[GEP_6]], align 4
229; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 8
230; CHECK-NEXT:    [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4
231; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 8
232; CHECK-NEXT:    [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4
233; CHECK-NEXT:    [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]]
234; CHECK-NEXT:    [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00
235; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 8
236; CHECK-NEXT:    store float [[MUL_3]], ptr [[GEP_9]], align 4
237; CHECK-NEXT:    [[I27:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 12
238; CHECK-NEXT:    [[L_7:%.*]] = load float, ptr [[I27]], align 4
239; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 12
240; CHECK-NEXT:    [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4
241; CHECK-NEXT:    [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]]
242; CHECK-NEXT:    [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00
243; CHECK-NEXT:    [[GEP_11:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 12
244; CHECK-NEXT:    store float [[MUL_4]], ptr [[GEP_11]], align 4
245; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
246; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]]
247; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
248; CHECK:       [[EXIT]]:
249; CHECK-NEXT:    ret void
250;
251entry:
252  br label %loop
253
254loop:
255  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
256  %shl.iv.5 = shl i64 %iv, 5
257  %gep.1 = getelementptr i8, ptr %arg, i64 %shl.iv.5
258  %add.5 = or disjoint i64 %shl.iv.5, 16
259  %gep.2 = getelementptr i8, ptr %arg, i64 %add.5
260  %shl.iv.4 = shl i64 %iv, 4
261  %gep.3 = getelementptr i8, ptr %arg2, i64 %shl.iv.4
262  %l.1 = load float, ptr %gep.1, align 4
263  %l.2 = load float, ptr %gep.2, align 4
264  %add.1 = fadd float %l.1, %l.2
265  %mul.1 = fmul float %add.1, 0.000000e+00
266  store float %mul.1, ptr %gep.3, align 4
267  %gep.4 = getelementptr i8, ptr %gep.1, i64 4
268  %l.3 = load float, ptr %gep.4, align 4
269  %gep.5 = getelementptr i8, ptr %gep.2, i64 4
270  %l.4 = load float, ptr %gep.5, align 4
271  %add.2 = fadd float %l.3, %l.4
272  %mul.2 = fmul float %add.2, 0.000000e+00
273  %gep.6 = getelementptr i8, ptr %gep.3, i64 4
274  store float %mul.2, ptr %gep.6, align 4
275  %gep.7 = getelementptr i8, ptr %gep.1, i64 8
276  %l.5 = load float, ptr %gep.7, align 4
277  %gep.8 = getelementptr i8, ptr %gep.2, i64 8
278  %l.6 = load float, ptr %gep.8, align 4
279  %add.3 = fadd float %l.5, %l.6
280  %mul.3 = fmul float %add.3, 0.000000e+00
281  %gep.9 = getelementptr i8, ptr %gep.3, i64 8
282  store float %mul.3, ptr %gep.9, align 4
283  %i27 = getelementptr i8, ptr %gep.1, i64 12
284  %l.7 = load float, ptr %i27, align 4
285  %gep.10 = getelementptr i8, ptr %gep.2, i64 12
286  %l.8 = load float, ptr %gep.10, align 4
287  %add.4 = fadd float %l.7, %l.8
288  %mul.4 = fmul float %add.4, 0.000000e+00
289  %gep.11 = getelementptr i8, ptr %gep.3, i64 12
290  store float %mul.4, ptr %gep.11, align 4
291  %iv.next = add i64 %iv, 1
292  %ec = icmp eq i64 %iv, %arg1
293  br i1 %ec, label %exit, label %loop
294
295exit:
296  ret void
297}
298
299define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) #1 {
300; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse2(
301; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
302; CHECK-NEXT:  [[ENTRY:.*]]:
303; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[N]], 3
304; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
305; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52
306; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
307; CHECK:       [[VECTOR_SCEVCHECK]]:
308; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[N]], 3
309; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24
310; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
311; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
312; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
313; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]]
314; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
315; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]]
316; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
317; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28
318; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
319; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
320; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
321; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 0, [[MUL_RESULT3]]
322; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
323; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]]
324; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]]
325; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20
326; CHECK-NEXT:    [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
327; CHECK-NEXT:    [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
328; CHECK-NEXT:    [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
329; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT7]]
330; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
331; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]]
332; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]]
333; CHECK-NEXT:    [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
334; CHECK-NEXT:    [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
335; CHECK-NEXT:    [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0
336; CHECK-NEXT:    [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1
337; CHECK-NEXT:    [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT11]]
338; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]]
339; CHECK-NEXT:    [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]]
340; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]]
341; CHECK-NEXT:    [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12
342; CHECK-NEXT:    [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
343; CHECK-NEXT:    [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0
344; CHECK-NEXT:    [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1
345; CHECK-NEXT:    [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT15]]
346; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]]
347; CHECK-NEXT:    [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]]
348; CHECK-NEXT:    [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]]
349; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8
350; CHECK-NEXT:    [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
351; CHECK-NEXT:    [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0
352; CHECK-NEXT:    [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1
353; CHECK-NEXT:    [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT19]]
354; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]]
355; CHECK-NEXT:    [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]]
356; CHECK-NEXT:    [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]]
357; CHECK-NEXT:    [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4
358; CHECK-NEXT:    [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
359; CHECK-NEXT:    [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0
360; CHECK-NEXT:    [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1
361; CHECK-NEXT:    [[TMP27:%.*]] = sub i64 0, [[MUL_RESULT23]]
362; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]]
363; CHECK-NEXT:    [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]]
364; CHECK-NEXT:    [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]]
365; CHECK-NEXT:    [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
366; CHECK-NEXT:    [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0
367; CHECK-NEXT:    [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1
368; CHECK-NEXT:    [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]]
369; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]]
370; CHECK-NEXT:    [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]]
371; CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]]
372; CHECK-NEXT:    [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]]
373; CHECK-NEXT:    [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]]
374; CHECK-NEXT:    [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]]
375; CHECK-NEXT:    [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]]
376; CHECK-NEXT:    [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]]
377; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]]
378; CHECK-NEXT:    [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]]
379; CHECK-NEXT:    br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
380; CHECK:       [[VECTOR_MEMCHECK]]:
381; CHECK-NEXT:    [[TMP42:%.*]] = lshr i64 [[N]], 3
382; CHECK-NEXT:    [[TMP43:%.*]] = shl i64 [[TMP42]], 5
383; CHECK-NEXT:    [[TMP44:%.*]] = add i64 [[TMP43]], 32
384; CHECK-NEXT:    [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP44]]
385; CHECK-NEXT:    [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4
386; CHECK-NEXT:    [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]]
387; CHECK-NEXT:    [[TMP46:%.*]] = shl i64 [[TMP42]], 4
388; CHECK-NEXT:    [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8
389; CHECK-NEXT:    [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]]
390; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]]
391; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]]
392; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
393; CHECK-NEXT:    [[BOUND031:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]]
394; CHECK-NEXT:    [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]]
395; CHECK-NEXT:    [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND031]], [[BOUND132]]
396; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]]
397; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
398; CHECK:       [[VECTOR_PH]]:
399; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
400; CHECK-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
401; CHECK-NEXT:    [[TMP49:%.*]] = select i1 [[TMP48]], i64 4, i64 [[N_MOD_VF]]
402; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP49]]
403; CHECK-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 8
404; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
405; CHECK:       [[VECTOR_BODY]]:
406; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
407; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 8, i64 16, i64 24>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
408; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
409; CHECK-NEXT:    [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 0
410; CHECK-NEXT:    [[TMP51:%.*]] = lshr exact i64 [[TMP50]], 1
411; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[TMP51]]
412; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4
413; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
414; CHECK-NEXT:    [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
415; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]]
416; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]]
417; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]]
418; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
419; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
420; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
421; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <8 x i32> [[TMP58]], <8 x i32> [[TMP59]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
422; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <8 x i32> [[TMP60]], <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
423; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
424; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
425; CHECK-NEXT:    store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4
426; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
427; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32)
428; CHECK-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
429; CHECK-NEXT:    br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
430; CHECK:       [[MIDDLE_BLOCK]]:
431; CHECK-NEXT:    br label %[[SCALAR_PH]]
432; CHECK:       [[SCALAR_PH]]:
433; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
434; CHECK-NEXT:    br label %[[LOOP:.*]]
435; CHECK:       [[LOOP]]:
436; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ]
437; CHECK-NEXT:    [[SHR_1:%.*]] = lshr exact i64 [[IV]], 1
438; CHECK-NEXT:    [[GEP_B:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[SHR_1]]
439; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_B]], align 4
440; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
441; CHECK-NEXT:    store i32 [[L]], ptr [[GEP_A]], align 4
442; CHECK-NEXT:    [[IV_NEXT:%.*]] = or disjoint i64 [[IV]], 1
443; CHECK-NEXT:    [[GEP_A_1:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]]
444; CHECK-NEXT:    store i32 0, ptr [[GEP_A_1]], align 4
445; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = or disjoint i64 [[IV]], 2
446; CHECK-NEXT:    [[SHR_2:%.*]] = lshr exact i64 [[IV_NEXT_1]], 1
447; CHECK-NEXT:    [[GEP_B_2:%.*]] = getelementptr i32, ptr [[B]], i64 [[SHR_2]]
448; CHECK-NEXT:    [[TMP65:%.*]] = load i32, ptr [[GEP_B_2]], align 4
449; CHECK-NEXT:    [[GEP_A_2:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_1]]
450; CHECK-NEXT:    store i32 [[TMP65]], ptr [[GEP_A_2]], align 4
451; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = or disjoint i64 [[IV]], 3
452; CHECK-NEXT:    [[GEP_A_3:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_2]]
453; CHECK-NEXT:    store i32 0, ptr [[GEP_A_3]], align 4
454; CHECK-NEXT:    [[IV_NEXT_3:%.*]] = or disjoint i64 [[IV]], 4
455; CHECK-NEXT:    [[GEP_B_4:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
456; CHECK-NEXT:    [[TMP66:%.*]] = load i32, ptr [[GEP_B_4]], align 4
457; CHECK-NEXT:    [[GEP_A_4:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_3]]
458; CHECK-NEXT:    store i32 [[TMP66]], ptr [[GEP_A_4]], align 4
459; CHECK-NEXT:    [[IV_NEXT_4:%.*]] = or disjoint i64 [[IV]], 5
460; CHECK-NEXT:    [[GEP_A_5:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_4]]
461; CHECK-NEXT:    store i32 0, ptr [[GEP_A_5]], align 4
462; CHECK-NEXT:    [[IV_NEXT_5:%.*]] = or disjoint i64 [[IV]], 6
463; CHECK-NEXT:    [[GEP_A_6:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_5]]
464; CHECK-NEXT:    store i32 0, ptr [[GEP_A_6]], align 4
465; CHECK-NEXT:    [[IV_NEXT_6:%.*]] = or disjoint i64 [[IV]], 7
466; CHECK-NEXT:    [[GEP_A_7:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_6]]
467; CHECK-NEXT:    store i32 0, ptr [[GEP_A_7]], align 4
468; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
469; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
470; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
471; CHECK:       [[EXIT]]:
472; CHECK-NEXT:    ret void
473;
474entry:
475  br label %loop
476
477loop:
478  %iv = phi i64 [ 0, %entry ], [ %iv.next.7, %loop ]
479  %shr.1 = lshr exact i64 %iv, 1
480  %gep.B = getelementptr nusw i32, ptr %B, i64 %shr.1
481  %l = load i32, ptr %gep.B, align 4
482  %gep.A = getelementptr i32, ptr %A, i64 %iv
483  store i32 %l, ptr %gep.A, align 4
484  %iv.next = or disjoint i64 %iv, 1
485  %gep.A.1 = getelementptr i32, ptr %A, i64 %iv.next
486  store i32 0, ptr %gep.A.1, align 4
487  %iv.next.1 = or disjoint i64 %iv, 2
488  %shr.2 = lshr exact i64 %iv.next.1, 1
489  %gep.B.2 = getelementptr i32, ptr %B, i64 %shr.2
490  %1 = load i32, ptr %gep.B.2, align 4
491  %gep.A.2 = getelementptr i32, ptr %A, i64 %iv.next.1
492  store i32 %1, ptr %gep.A.2, align 4
493  %iv.next.2 = or disjoint i64 %iv, 3
494  %gep.A.3 = getelementptr i32, ptr %A, i64 %iv.next.2
495  store i32 0, ptr %gep.A.3, align 4
496  %iv.next.3 = or disjoint i64 %iv, 4
497  %gep.B.4 = getelementptr i32, ptr %B, i64 %iv
498  %2 = load i32, ptr %gep.B.4, align 4
499  %gep.A.4 = getelementptr i32, ptr %A, i64 %iv.next.3
500  store i32 %2, ptr %gep.A.4, align 4
501  %iv.next.4 = or disjoint i64 %iv, 5
502  %gep.A.5 = getelementptr i32, ptr %A, i64 %iv.next.4
503  store i32 0, ptr %gep.A.5, align 4
504  %iv.next.5 = or disjoint i64 %iv, 6
505  %gep.A.6 = getelementptr i32, ptr %A, i64 %iv.next.5
506  store i32 0, ptr %gep.A.6, align 4
507  %iv.next.6 = or disjoint i64 %iv, 7
508  %gep.A.7 = getelementptr i32, ptr %A, i64 %iv.next.6
509  store i32 0, ptr %gep.A.7, align 4
510  %iv.next.7 = add nuw nsw i64 %iv, 8
511  %ec = icmp eq i64 %iv, %N
512  br i1 %ec, label %exit, label %loop
513
514exit:
515  ret void
516}
517
518; Test case for https://github.com/llvm/llvm-project/issues/112922.
519define void @interleave_store_double_i64(ptr %dst) {
520; CHECK-LABEL: define void @interleave_store_double_i64(
521; CHECK-SAME: ptr [[DST:%.*]]) {
522; CHECK-NEXT:  [[ENTRY:.*]]:
523; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
524; CHECK:       [[VECTOR_PH]]:
525; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
526; CHECK:       [[VECTOR_BODY]]:
527; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
528; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
529; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
530; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[TMP0]]
531; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double>
532; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
533; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
534; CHECK-NEXT:    store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
535; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
536; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
537; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
538; CHECK:       [[MIDDLE_BLOCK]]:
539; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
540; CHECK:       [[SCALAR_PH]]:
541; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
542; CHECK-NEXT:    br label %[[LOOP:.*]]
543; CHECK:       [[LOOP]]:
544; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
545; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1
546; CHECK-NEXT:    store i64 [[IV]], ptr [[GEP_1]], align 8
547; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]]
548; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_0]], align 8
549; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
550; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
551; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
552; CHECK:       [[EXIT]]:
553; CHECK-NEXT:    ret void
554;
555entry:
556  br label %loop
557
558loop:
559  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
560  %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1
561  store i64 %iv, ptr %gep.1, align 8
562  %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv
563  store double 0.000000e+00, ptr %gep.0, align 8
564  %iv.next = add i64 %iv, 1
565  %ec = icmp eq i64 %iv, 1
566  br i1 %ec, label %exit, label %loop
567
568exit:
569  ret void
570}
571
572define void @interleave_store_i64_double(ptr %dst) {
573; CHECK-LABEL: define void @interleave_store_i64_double(
574; CHECK-SAME: ptr [[DST:%.*]]) {
575; CHECK-NEXT:  [[ENTRY:.*]]:
576; CHECK-NEXT:    br label %[[LOOP:.*]]
577; CHECK:       [[LOOP]]:
578; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
579; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]]
580; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_0]], align 8
581; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1
582; CHECK-NEXT:    store i64 [[IV]], ptr [[GEP_1]], align 8
583; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
584; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
585; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
586; CHECK:       [[EXIT]]:
587; CHECK-NEXT:    ret void
588;
589entry:
590  br label %loop
591
592loop:
593  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
594  %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv
595  store double 0.000000e+00, ptr %gep.0, align 8
596  %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1
597  store i64 %iv, ptr %gep.1, align 8
598  %iv.next = add i64 %iv, 1
599  %ec = icmp eq i64 %iv, 1
600  br i1 %ec, label %exit, label %loop
601
602exit:
603  ret void
604}
605
606; TODO: The interleave group should likely have the same cost as @interleave_store_double_i64.
607define void @interleave_store_double_i64_2(ptr %dst) {
608; CHECK-LABEL: define void @interleave_store_double_i64_2(
609; CHECK-SAME: ptr [[DST:%.*]]) {
610; CHECK-NEXT:  [[ENTRY:.*]]:
611; CHECK-NEXT:    br label %[[LOOP:.*]]
612; CHECK:       [[LOOP]]:
613; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
614; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1
615; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_1]], align 8
616; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]]
617; CHECK-NEXT:    store i64 [[IV]], ptr [[GEP_0]], align 8
618; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
619; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
620; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
621; CHECK:       [[EXIT]]:
622; CHECK-NEXT:    ret void
623;
624entry:
625  br label %loop
626
627loop:
628  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
629  %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1
630  store double 0.000000e+00, ptr %gep.1, align 8
631  %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv
632  store i64 %iv, ptr %gep.0, align 8
633  %iv.next = add i64 %iv, 1
634  %ec = icmp eq i64 %iv, 1
635  br i1 %ec, label %exit, label %loop
636
637exit:
638  ret void
639}
640
641define void @interleave_store_i64_double_2(ptr %dst) {
642; CHECK-LABEL: define void @interleave_store_i64_double_2(
643; CHECK-SAME: ptr [[DST:%.*]]) {
644; CHECK-NEXT:  [[ENTRY:.*]]:
645; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
646; CHECK:       [[VECTOR_PH]]:
647; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
648; CHECK:       [[VECTOR_BODY]]:
649; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
650; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
651; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
652; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[TMP0]]
653; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double>
654; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
655; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
656; CHECK-NEXT:    store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
657; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
658; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
659; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
660; CHECK:       [[MIDDLE_BLOCK]]:
661; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
662; CHECK:       [[SCALAR_PH]]:
663; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
664; CHECK-NEXT:    br label %[[LOOP:.*]]
665; CHECK:       [[LOOP]]:
666; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
667; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]]
668; CHECK-NEXT:    store i64 [[IV]], ptr [[GEP_0]], align 8
669; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1
670; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_1]], align 8
671; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
672; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
673; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
674; CHECK:       [[EXIT]]:
675; CHECK-NEXT:    ret void
676;
677entry:
678  br label %loop
679
680loop:
681  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
682  %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv
683  store i64 %iv, ptr %gep.0, align 8
684  %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1
685  store double 0.000000e+00, ptr %gep.1, align 8
686  %iv.next = add i64 %iv, 1
687  %ec = icmp eq i64 %iv, 1
688  br i1 %ec, label %exit, label %loop
689
690exit:
691  ret void
692}
693
694
695
696attributes #0 = { "target-features"="+sse4.2" }
697attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
698
699;.
700; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
701; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
702; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
703; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
704; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
705; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
706; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
707; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
708; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
709; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
710; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
711; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
712; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
713; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
714; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
715;.
716