xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (revision 1de3dc7d23dd6b856efad3a3a04f2396328726d7)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVE1
3; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVED
4; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -force-vector-interleave=1 -vectorizer-maximize-bandwidth -S < %s | FileCheck %s --check-prefixes=CHECK-MAXBW
5
6target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7target triple = "aarch64-none-unknown-elf"
8
9define i32 @dotp(ptr %a, ptr %b) {
10; CHECK-INTERLEAVE1-LABEL: define i32 @dotp(
11; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
12; CHECK-INTERLEAVE1-NEXT:  entry:
13; CHECK-INTERLEAVE1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK-INTERLEAVE1:       vector.ph:
15; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
16; CHECK-INTERLEAVE1:       vector.body:
17; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
18; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
19; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
20; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
21; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
22; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
23; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
24; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
25; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
26; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
27; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
28; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
29; CHECK-INTERLEAVE1-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]])
30; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
31; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
32; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
33; CHECK-INTERLEAVE1:       middle.block:
34; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
35; CHECK-INTERLEAVE1-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
36; CHECK-INTERLEAVE1:       scalar.ph:
37; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
38; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
39; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
40; CHECK-INTERLEAVE1:       for.body:
41; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
42; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
43; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
44; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
45; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
46; CHECK-INTERLEAVE1-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
47; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
48; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
49; CHECK-INTERLEAVE1-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
50; CHECK-INTERLEAVE1-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
51; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
52; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
53; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
54; CHECK-INTERLEAVE1:       for.exit:
55; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
56; CHECK-INTERLEAVE1-NEXT:    ret i32 [[ADD_LCSSA]]
57;
58; CHECK-INTERLEAVED-LABEL: define i32 @dotp(
59; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
60; CHECK-INTERLEAVED-NEXT:  entry:
61; CHECK-INTERLEAVED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
62; CHECK-INTERLEAVED:       vector.ph:
63; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
64; CHECK-INTERLEAVED:       vector.body:
65; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
66; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
67; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
68; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
69; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
70; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
71; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16
72; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
73; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
74; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
75; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
76; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
77; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
78; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16
79; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1
80; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1
81; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32>
82; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i32>
83; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = mul <16 x i32> [[TMP9]], [[TMP4]]
84; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = mul <16 x i32> [[TMP10]], [[TMP5]]
85; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]])
86; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]])
87; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
88; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
89; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
90; CHECK-INTERLEAVED:       middle.block:
91; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
92; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
93; CHECK-INTERLEAVED-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
94; CHECK-INTERLEAVED:       scalar.ph:
95; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
96; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
97; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
98; CHECK-INTERLEAVED:       for.body:
99; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
100; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
101; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
102; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
103; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
104; CHECK-INTERLEAVED-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
105; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
106; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
107; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
108; CHECK-INTERLEAVED-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
109; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
110; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
111; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
112; CHECK-INTERLEAVED:       for.exit:
113; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
114; CHECK-INTERLEAVED-NEXT:    ret i32 [[ADD_LCSSA]]
115;
116; CHECK-MAXBW-LABEL: define i32 @dotp(
117; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
118; CHECK-MAXBW-NEXT:  entry:
119; CHECK-MAXBW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
120; CHECK-MAXBW:       vector.ph:
121; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
122; CHECK-MAXBW:       vector.body:
123; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
124; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
125; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
126; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
127; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
128; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
129; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
130; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
131; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
132; CHECK-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
133; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
134; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
135; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]])
136; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
137; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
138; CHECK-MAXBW-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
139; CHECK-MAXBW:       middle.block:
140; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
141; CHECK-MAXBW-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
142; CHECK-MAXBW:       scalar.ph:
143; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
144; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
145; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
146; CHECK-MAXBW:       for.body:
147; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
148; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
149; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
150; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
151; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
152; CHECK-MAXBW-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
153; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
154; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
155; CHECK-MAXBW-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
156; CHECK-MAXBW-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
157; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
158; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
159; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
160; CHECK-MAXBW:       for.exit:
161; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
162; CHECK-MAXBW-NEXT:    ret i32 [[ADD_LCSSA]]
163;
164entry:
165  br label %for.body
166
167for.body:                                         ; preds = %for.body, %entry
168  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
169  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
170  %gep.a = getelementptr i8, ptr %a, i64 %iv
171  %load.a = load i8, ptr %gep.a, align 1
172  %ext.a = zext i8 %load.a to i32
173  %gep.b = getelementptr i8, ptr %b, i64 %iv
174  %load.b = load i8, ptr %gep.b, align 1
175  %ext.b = zext i8 %load.b to i32
176  %mul = mul i32 %ext.b, %ext.a
177  %add = add i32 %mul, %accum
178  %iv.next = add i64 %iv, 1
179  %exitcond.not = icmp eq i64 %iv.next, 1024
180  br i1 %exitcond.not, label %for.exit, label %for.body
181
182for.exit:                        ; preds = %for.body
183  ret i32 %add
184}
185
186define i32 @not_dotp_different_types(ptr %a, ptr %b) {
187; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_different_types(
188; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
189; CHECK-INTERLEAVE1-NEXT:  entry:
190; CHECK-INTERLEAVE1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
191; CHECK-INTERLEAVE1:       vector.ph:
192; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
193; CHECK-INTERLEAVE1:       vector.body:
194; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
195; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
196; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
197; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
198; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
199; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
200; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
201; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
202; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
203; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
204; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
205; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
206; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
207; CHECK-INTERLEAVE1-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
208; CHECK-INTERLEAVE1-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
209; CHECK-INTERLEAVE1-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
210; CHECK-INTERLEAVE1-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
211; CHECK-INTERLEAVE1-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
212; CHECK-INTERLEAVE1-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
213; CHECK-INTERLEAVE1-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0
214; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1
215; CHECK-INTERLEAVE1-NEXT:    [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
216; CHECK-INTERLEAVE1-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
217; CHECK-INTERLEAVE1-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
218; CHECK-INTERLEAVE1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
219; CHECK-INTERLEAVE1-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
220; CHECK-INTERLEAVE1-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
221; CHECK-INTERLEAVE1-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
222; CHECK-INTERLEAVE1-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
223; CHECK-INTERLEAVE1-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
224; CHECK-INTERLEAVE1-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
225; CHECK-INTERLEAVE1-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
226; CHECK-INTERLEAVE1-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
227; CHECK-INTERLEAVE1-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
228; CHECK-INTERLEAVE1-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
229; CHECK-INTERLEAVE1-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
230; CHECK-INTERLEAVE1-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
231; CHECK-INTERLEAVE1-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
232; CHECK-INTERLEAVE1-NEXT:    [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2
233; CHECK-INTERLEAVE1-NEXT:    [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2
234; CHECK-INTERLEAVE1-NEXT:    [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2
235; CHECK-INTERLEAVE1-NEXT:    [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2
236; CHECK-INTERLEAVE1-NEXT:    [[TMP39:%.*]] = load i16, ptr [[TMP23]], align 2
237; CHECK-INTERLEAVE1-NEXT:    [[TMP40:%.*]] = load i16, ptr [[TMP24]], align 2
238; CHECK-INTERLEAVE1-NEXT:    [[TMP41:%.*]] = load i16, ptr [[TMP25]], align 2
239; CHECK-INTERLEAVE1-NEXT:    [[TMP42:%.*]] = load i16, ptr [[TMP26]], align 2
240; CHECK-INTERLEAVE1-NEXT:    [[TMP43:%.*]] = load i16, ptr [[TMP27]], align 2
241; CHECK-INTERLEAVE1-NEXT:    [[TMP44:%.*]] = load i16, ptr [[TMP28]], align 2
242; CHECK-INTERLEAVE1-NEXT:    [[TMP45:%.*]] = load i16, ptr [[TMP29]], align 2
243; CHECK-INTERLEAVE1-NEXT:    [[TMP46:%.*]] = load i16, ptr [[TMP30]], align 2
244; CHECK-INTERLEAVE1-NEXT:    [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2
245; CHECK-INTERLEAVE1-NEXT:    [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2
246; CHECK-INTERLEAVE1-NEXT:    [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2
247; CHECK-INTERLEAVE1-NEXT:    [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2
248; CHECK-INTERLEAVE1-NEXT:    [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0
249; CHECK-INTERLEAVE1-NEXT:    [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1
250; CHECK-INTERLEAVE1-NEXT:    [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2
251; CHECK-INTERLEAVE1-NEXT:    [[TMP54:%.*]] = insertelement <16 x i16> [[TMP53]], i16 [[TMP38]], i32 3
252; CHECK-INTERLEAVE1-NEXT:    [[TMP55:%.*]] = insertelement <16 x i16> [[TMP54]], i16 [[TMP39]], i32 4
253; CHECK-INTERLEAVE1-NEXT:    [[TMP56:%.*]] = insertelement <16 x i16> [[TMP55]], i16 [[TMP40]], i32 5
254; CHECK-INTERLEAVE1-NEXT:    [[TMP57:%.*]] = insertelement <16 x i16> [[TMP56]], i16 [[TMP41]], i32 6
255; CHECK-INTERLEAVE1-NEXT:    [[TMP58:%.*]] = insertelement <16 x i16> [[TMP57]], i16 [[TMP42]], i32 7
256; CHECK-INTERLEAVE1-NEXT:    [[TMP59:%.*]] = insertelement <16 x i16> [[TMP58]], i16 [[TMP43]], i32 8
257; CHECK-INTERLEAVE1-NEXT:    [[TMP60:%.*]] = insertelement <16 x i16> [[TMP59]], i16 [[TMP44]], i32 9
258; CHECK-INTERLEAVE1-NEXT:    [[TMP61:%.*]] = insertelement <16 x i16> [[TMP60]], i16 [[TMP45]], i32 10
259; CHECK-INTERLEAVE1-NEXT:    [[TMP62:%.*]] = insertelement <16 x i16> [[TMP61]], i16 [[TMP46]], i32 11
260; CHECK-INTERLEAVE1-NEXT:    [[TMP63:%.*]] = insertelement <16 x i16> [[TMP62]], i16 [[TMP47]], i32 12
261; CHECK-INTERLEAVE1-NEXT:    [[TMP64:%.*]] = insertelement <16 x i16> [[TMP63]], i16 [[TMP48]], i32 13
262; CHECK-INTERLEAVE1-NEXT:    [[TMP65:%.*]] = insertelement <16 x i16> [[TMP64]], i16 [[TMP49]], i32 14
263; CHECK-INTERLEAVE1-NEXT:    [[TMP66:%.*]] = insertelement <16 x i16> [[TMP65]], i16 [[TMP50]], i32 15
264; CHECK-INTERLEAVE1-NEXT:    [[TMP67:%.*]] = zext <16 x i16> [[TMP66]] to <16 x i32>
265; CHECK-INTERLEAVE1-NEXT:    [[TMP68:%.*]] = mul <16 x i32> [[TMP67]], [[TMP18]]
266; CHECK-INTERLEAVE1-NEXT:    [[TMP69]] = add <16 x i32> [[TMP68]], [[VEC_PHI]]
267; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
268; CHECK-INTERLEAVE1-NEXT:    [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
269; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
270; CHECK-INTERLEAVE1:       middle.block:
271; CHECK-INTERLEAVE1-NEXT:    [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
272; CHECK-INTERLEAVE1-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
273; CHECK-INTERLEAVE1:       scalar.ph:
274; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
275; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP71]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
276; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
277; CHECK-INTERLEAVE1:       for.body:
278; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
279; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
280; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
281; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
282; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
283; CHECK-INTERLEAVE1-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
284; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i16, ptr [[GEP_B]], align 2
285; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = zext i16 [[LOAD_B]] to i32
286; CHECK-INTERLEAVE1-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
287; CHECK-INTERLEAVE1-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
288; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
289; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
290; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
291; CHECK-INTERLEAVE1:       for.exit:
292; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP71]], [[MIDDLE_BLOCK]] ]
293; CHECK-INTERLEAVE1-NEXT:    ret i32 [[ADD_LCSSA]]
294;
295; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_different_types(
296; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
297; CHECK-INTERLEAVED-NEXT:  entry:
298; CHECK-INTERLEAVED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
299; CHECK-INTERLEAVED:       vector.ph:
300; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
301; CHECK-INTERLEAVED:       vector.body:
302; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
303; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ]
304; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
305; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
306; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
307; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
308; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
309; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
310; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
311; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
312; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
313; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
314; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
315; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
316; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
317; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
318; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
319; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
320; CHECK-INTERLEAVED-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
321; CHECK-INTERLEAVED-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 16
322; CHECK-INTERLEAVED-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 17
323; CHECK-INTERLEAVED-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 18
324; CHECK-INTERLEAVED-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 19
325; CHECK-INTERLEAVED-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 20
326; CHECK-INTERLEAVED-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 21
327; CHECK-INTERLEAVED-NEXT:    [[TMP22:%.*]] = add i64 [[INDEX]], 22
328; CHECK-INTERLEAVED-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 23
329; CHECK-INTERLEAVED-NEXT:    [[TMP24:%.*]] = add i64 [[INDEX]], 24
330; CHECK-INTERLEAVED-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 25
331; CHECK-INTERLEAVED-NEXT:    [[TMP26:%.*]] = add i64 [[INDEX]], 26
332; CHECK-INTERLEAVED-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX]], 27
333; CHECK-INTERLEAVED-NEXT:    [[TMP28:%.*]] = add i64 [[INDEX]], 28
334; CHECK-INTERLEAVED-NEXT:    [[TMP29:%.*]] = add i64 [[INDEX]], 29
335; CHECK-INTERLEAVED-NEXT:    [[TMP30:%.*]] = add i64 [[INDEX]], 30
336; CHECK-INTERLEAVED-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 31
337; CHECK-INTERLEAVED-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
338; CHECK-INTERLEAVED-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i32 0
339; CHECK-INTERLEAVED-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16
340; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP33]], align 1
341; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1
342; CHECK-INTERLEAVED-NEXT:    [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
343; CHECK-INTERLEAVED-NEXT:    [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
344; CHECK-INTERLEAVED-NEXT:    [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
345; CHECK-INTERLEAVED-NEXT:    [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
346; CHECK-INTERLEAVED-NEXT:    [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
347; CHECK-INTERLEAVED-NEXT:    [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
348; CHECK-INTERLEAVED-NEXT:    [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
349; CHECK-INTERLEAVED-NEXT:    [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
350; CHECK-INTERLEAVED-NEXT:    [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
351; CHECK-INTERLEAVED-NEXT:    [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
352; CHECK-INTERLEAVED-NEXT:    [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
353; CHECK-INTERLEAVED-NEXT:    [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
354; CHECK-INTERLEAVED-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
355; CHECK-INTERLEAVED-NEXT:    [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
356; CHECK-INTERLEAVED-NEXT:    [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
357; CHECK-INTERLEAVED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
358; CHECK-INTERLEAVED-NEXT:    [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
359; CHECK-INTERLEAVED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
360; CHECK-INTERLEAVED-NEXT:    [[TMP53:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]]
361; CHECK-INTERLEAVED-NEXT:    [[TMP54:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]]
362; CHECK-INTERLEAVED-NEXT:    [[TMP55:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP18]]
363; CHECK-INTERLEAVED-NEXT:    [[TMP56:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP19]]
364; CHECK-INTERLEAVED-NEXT:    [[TMP57:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP20]]
365; CHECK-INTERLEAVED-NEXT:    [[TMP58:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP21]]
366; CHECK-INTERLEAVED-NEXT:    [[TMP59:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP22]]
367; CHECK-INTERLEAVED-NEXT:    [[TMP60:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP23]]
368; CHECK-INTERLEAVED-NEXT:    [[TMP61:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP24]]
369; CHECK-INTERLEAVED-NEXT:    [[TMP62:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP25]]
370; CHECK-INTERLEAVED-NEXT:    [[TMP63:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP26]]
371; CHECK-INTERLEAVED-NEXT:    [[TMP64:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP27]]
372; CHECK-INTERLEAVED-NEXT:    [[TMP65:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP28]]
373; CHECK-INTERLEAVED-NEXT:    [[TMP66:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP29]]
374; CHECK-INTERLEAVED-NEXT:    [[TMP67:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP30]]
375; CHECK-INTERLEAVED-NEXT:    [[TMP68:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP31]]
376; CHECK-INTERLEAVED-NEXT:    [[TMP69:%.*]] = load i16, ptr [[TMP37]], align 2
377; CHECK-INTERLEAVED-NEXT:    [[TMP70:%.*]] = load i16, ptr [[TMP38]], align 2
378; CHECK-INTERLEAVED-NEXT:    [[TMP71:%.*]] = load i16, ptr [[TMP39]], align 2
379; CHECK-INTERLEAVED-NEXT:    [[TMP72:%.*]] = load i16, ptr [[TMP40]], align 2
380; CHECK-INTERLEAVED-NEXT:    [[TMP73:%.*]] = load i16, ptr [[TMP41]], align 2
381; CHECK-INTERLEAVED-NEXT:    [[TMP74:%.*]] = load i16, ptr [[TMP42]], align 2
382; CHECK-INTERLEAVED-NEXT:    [[TMP75:%.*]] = load i16, ptr [[TMP43]], align 2
383; CHECK-INTERLEAVED-NEXT:    [[TMP76:%.*]] = load i16, ptr [[TMP44]], align 2
384; CHECK-INTERLEAVED-NEXT:    [[TMP77:%.*]] = load i16, ptr [[TMP45]], align 2
385; CHECK-INTERLEAVED-NEXT:    [[TMP78:%.*]] = load i16, ptr [[TMP46]], align 2
386; CHECK-INTERLEAVED-NEXT:    [[TMP79:%.*]] = load i16, ptr [[TMP47]], align 2
387; CHECK-INTERLEAVED-NEXT:    [[TMP80:%.*]] = load i16, ptr [[TMP48]], align 2
388; CHECK-INTERLEAVED-NEXT:    [[TMP81:%.*]] = load i16, ptr [[TMP49]], align 2
389; CHECK-INTERLEAVED-NEXT:    [[TMP82:%.*]] = load i16, ptr [[TMP50]], align 2
390; CHECK-INTERLEAVED-NEXT:    [[TMP83:%.*]] = load i16, ptr [[TMP51]], align 2
391; CHECK-INTERLEAVED-NEXT:    [[TMP84:%.*]] = load i16, ptr [[TMP52]], align 2
392; CHECK-INTERLEAVED-NEXT:    [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
393; CHECK-INTERLEAVED-NEXT:    [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
394; CHECK-INTERLEAVED-NEXT:    [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
395; CHECK-INTERLEAVED-NEXT:    [[TMP88:%.*]] = insertelement <16 x i16> [[TMP87]], i16 [[TMP72]], i32 3
396; CHECK-INTERLEAVED-NEXT:    [[TMP89:%.*]] = insertelement <16 x i16> [[TMP88]], i16 [[TMP73]], i32 4
397; CHECK-INTERLEAVED-NEXT:    [[TMP90:%.*]] = insertelement <16 x i16> [[TMP89]], i16 [[TMP74]], i32 5
398; CHECK-INTERLEAVED-NEXT:    [[TMP91:%.*]] = insertelement <16 x i16> [[TMP90]], i16 [[TMP75]], i32 6
399; CHECK-INTERLEAVED-NEXT:    [[TMP92:%.*]] = insertelement <16 x i16> [[TMP91]], i16 [[TMP76]], i32 7
400; CHECK-INTERLEAVED-NEXT:    [[TMP93:%.*]] = insertelement <16 x i16> [[TMP92]], i16 [[TMP77]], i32 8
401; CHECK-INTERLEAVED-NEXT:    [[TMP94:%.*]] = insertelement <16 x i16> [[TMP93]], i16 [[TMP78]], i32 9
402; CHECK-INTERLEAVED-NEXT:    [[TMP95:%.*]] = insertelement <16 x i16> [[TMP94]], i16 [[TMP79]], i32 10
403; CHECK-INTERLEAVED-NEXT:    [[TMP96:%.*]] = insertelement <16 x i16> [[TMP95]], i16 [[TMP80]], i32 11
404; CHECK-INTERLEAVED-NEXT:    [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP81]], i32 12
405; CHECK-INTERLEAVED-NEXT:    [[TMP98:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP82]], i32 13
406; CHECK-INTERLEAVED-NEXT:    [[TMP99:%.*]] = insertelement <16 x i16> [[TMP98]], i16 [[TMP83]], i32 14
407; CHECK-INTERLEAVED-NEXT:    [[TMP100:%.*]] = insertelement <16 x i16> [[TMP99]], i16 [[TMP84]], i32 15
408; CHECK-INTERLEAVED-NEXT:    [[TMP101:%.*]] = load i16, ptr [[TMP53]], align 2
409; CHECK-INTERLEAVED-NEXT:    [[TMP102:%.*]] = load i16, ptr [[TMP54]], align 2
410; CHECK-INTERLEAVED-NEXT:    [[TMP103:%.*]] = load i16, ptr [[TMP55]], align 2
411; CHECK-INTERLEAVED-NEXT:    [[TMP104:%.*]] = load i16, ptr [[TMP56]], align 2
412; CHECK-INTERLEAVED-NEXT:    [[TMP105:%.*]] = load i16, ptr [[TMP57]], align 2
413; CHECK-INTERLEAVED-NEXT:    [[TMP106:%.*]] = load i16, ptr [[TMP58]], align 2
414; CHECK-INTERLEAVED-NEXT:    [[TMP107:%.*]] = load i16, ptr [[TMP59]], align 2
415; CHECK-INTERLEAVED-NEXT:    [[TMP108:%.*]] = load i16, ptr [[TMP60]], align 2
416; CHECK-INTERLEAVED-NEXT:    [[TMP109:%.*]] = load i16, ptr [[TMP61]], align 2
417; CHECK-INTERLEAVED-NEXT:    [[TMP110:%.*]] = load i16, ptr [[TMP62]], align 2
418; CHECK-INTERLEAVED-NEXT:    [[TMP111:%.*]] = load i16, ptr [[TMP63]], align 2
419; CHECK-INTERLEAVED-NEXT:    [[TMP112:%.*]] = load i16, ptr [[TMP64]], align 2
420; CHECK-INTERLEAVED-NEXT:    [[TMP113:%.*]] = load i16, ptr [[TMP65]], align 2
421; CHECK-INTERLEAVED-NEXT:    [[TMP114:%.*]] = load i16, ptr [[TMP66]], align 2
422; CHECK-INTERLEAVED-NEXT:    [[TMP115:%.*]] = load i16, ptr [[TMP67]], align 2
423; CHECK-INTERLEAVED-NEXT:    [[TMP116:%.*]] = load i16, ptr [[TMP68]], align 2
424; CHECK-INTERLEAVED-NEXT:    [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0
425; CHECK-INTERLEAVED-NEXT:    [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1
426; CHECK-INTERLEAVED-NEXT:    [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2
427; CHECK-INTERLEAVED-NEXT:    [[TMP120:%.*]] = insertelement <16 x i16> [[TMP119]], i16 [[TMP104]], i32 3
428; CHECK-INTERLEAVED-NEXT:    [[TMP121:%.*]] = insertelement <16 x i16> [[TMP120]], i16 [[TMP105]], i32 4
429; CHECK-INTERLEAVED-NEXT:    [[TMP122:%.*]] = insertelement <16 x i16> [[TMP121]], i16 [[TMP106]], i32 5
430; CHECK-INTERLEAVED-NEXT:    [[TMP123:%.*]] = insertelement <16 x i16> [[TMP122]], i16 [[TMP107]], i32 6
431; CHECK-INTERLEAVED-NEXT:    [[TMP124:%.*]] = insertelement <16 x i16> [[TMP123]], i16 [[TMP108]], i32 7
432; CHECK-INTERLEAVED-NEXT:    [[TMP125:%.*]] = insertelement <16 x i16> [[TMP124]], i16 [[TMP109]], i32 8
433; CHECK-INTERLEAVED-NEXT:    [[TMP126:%.*]] = insertelement <16 x i16> [[TMP125]], i16 [[TMP110]], i32 9
434; CHECK-INTERLEAVED-NEXT:    [[TMP127:%.*]] = insertelement <16 x i16> [[TMP126]], i16 [[TMP111]], i32 10
435; CHECK-INTERLEAVED-NEXT:    [[TMP128:%.*]] = insertelement <16 x i16> [[TMP127]], i16 [[TMP112]], i32 11
436; CHECK-INTERLEAVED-NEXT:    [[TMP129:%.*]] = insertelement <16 x i16> [[TMP128]], i16 [[TMP113]], i32 12
437; CHECK-INTERLEAVED-NEXT:    [[TMP130:%.*]] = insertelement <16 x i16> [[TMP129]], i16 [[TMP114]], i32 13
438; CHECK-INTERLEAVED-NEXT:    [[TMP131:%.*]] = insertelement <16 x i16> [[TMP130]], i16 [[TMP115]], i32 14
439; CHECK-INTERLEAVED-NEXT:    [[TMP132:%.*]] = insertelement <16 x i16> [[TMP131]], i16 [[TMP116]], i32 15
440; CHECK-INTERLEAVED-NEXT:    [[TMP133:%.*]] = zext <16 x i16> [[TMP100]] to <16 x i32>
441; CHECK-INTERLEAVED-NEXT:    [[TMP134:%.*]] = zext <16 x i16> [[TMP132]] to <16 x i32>
442; CHECK-INTERLEAVED-NEXT:    [[TMP135:%.*]] = mul <16 x i32> [[TMP133]], [[TMP35]]
443; CHECK-INTERLEAVED-NEXT:    [[TMP136:%.*]] = mul <16 x i32> [[TMP134]], [[TMP36]]
444; CHECK-INTERLEAVED-NEXT:    [[TMP137]] = add <16 x i32> [[TMP135]], [[VEC_PHI]]
445; CHECK-INTERLEAVED-NEXT:    [[TMP138]] = add <16 x i32> [[TMP136]], [[VEC_PHI1]]
446; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
447; CHECK-INTERLEAVED-NEXT:    [[TMP139:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
448; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP139]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
449; CHECK-INTERLEAVED:       middle.block:
450; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add <16 x i32> [[TMP138]], [[TMP137]]
451; CHECK-INTERLEAVED-NEXT:    [[TMP140:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
452; CHECK-INTERLEAVED-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
453; CHECK-INTERLEAVED:       scalar.ph:
454; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
455; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP140]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
456; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
457; CHECK-INTERLEAVED:       for.body:
458; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
459; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
460; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
461; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
462; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
463; CHECK-INTERLEAVED-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
464; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i16, ptr [[GEP_B]], align 2
465; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = zext i16 [[LOAD_B]] to i32
466; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
467; CHECK-INTERLEAVED-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
468; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
469; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
470; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
471; CHECK-INTERLEAVED:       for.exit:
472; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP140]], [[MIDDLE_BLOCK]] ]
473; CHECK-INTERLEAVED-NEXT:    ret i32 [[ADD_LCSSA]]
474;
475; CHECK-MAXBW-LABEL: define i32 @not_dotp_different_types(
476; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
477; CHECK-MAXBW-NEXT:  entry:
478; CHECK-MAXBW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
479; CHECK-MAXBW:       vector.ph:
480; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
481; CHECK-MAXBW:       vector.body:
482; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
483; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
484; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
485; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
486; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
487; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
488; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
489; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
490; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
491; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
492; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
493; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
494; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
495; CHECK-MAXBW-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
496; CHECK-MAXBW-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
497; CHECK-MAXBW-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
498; CHECK-MAXBW-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
499; CHECK-MAXBW-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
500; CHECK-MAXBW-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
501; CHECK-MAXBW-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0
502; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1
503; CHECK-MAXBW-NEXT:    [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
504; CHECK-MAXBW-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
505; CHECK-MAXBW-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
506; CHECK-MAXBW-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
507; CHECK-MAXBW-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
508; CHECK-MAXBW-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
509; CHECK-MAXBW-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
510; CHECK-MAXBW-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
511; CHECK-MAXBW-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
512; CHECK-MAXBW-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
513; CHECK-MAXBW-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
514; CHECK-MAXBW-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
515; CHECK-MAXBW-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
516; CHECK-MAXBW-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
517; CHECK-MAXBW-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
518; CHECK-MAXBW-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
519; CHECK-MAXBW-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
520; CHECK-MAXBW-NEXT:    [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2
521; CHECK-MAXBW-NEXT:    [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2
522; CHECK-MAXBW-NEXT:    [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2
523; CHECK-MAXBW-NEXT:    [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2
524; CHECK-MAXBW-NEXT:    [[TMP39:%.*]] = load i16, ptr [[TMP23]], align 2
525; CHECK-MAXBW-NEXT:    [[TMP40:%.*]] = load i16, ptr [[TMP24]], align 2
526; CHECK-MAXBW-NEXT:    [[TMP41:%.*]] = load i16, ptr [[TMP25]], align 2
527; CHECK-MAXBW-NEXT:    [[TMP42:%.*]] = load i16, ptr [[TMP26]], align 2
528; CHECK-MAXBW-NEXT:    [[TMP43:%.*]] = load i16, ptr [[TMP27]], align 2
529; CHECK-MAXBW-NEXT:    [[TMP44:%.*]] = load i16, ptr [[TMP28]], align 2
530; CHECK-MAXBW-NEXT:    [[TMP45:%.*]] = load i16, ptr [[TMP29]], align 2
531; CHECK-MAXBW-NEXT:    [[TMP46:%.*]] = load i16, ptr [[TMP30]], align 2
532; CHECK-MAXBW-NEXT:    [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2
533; CHECK-MAXBW-NEXT:    [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2
534; CHECK-MAXBW-NEXT:    [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2
535; CHECK-MAXBW-NEXT:    [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2
536; CHECK-MAXBW-NEXT:    [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0
537; CHECK-MAXBW-NEXT:    [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1
538; CHECK-MAXBW-NEXT:    [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2
539; CHECK-MAXBW-NEXT:    [[TMP54:%.*]] = insertelement <16 x i16> [[TMP53]], i16 [[TMP38]], i32 3
540; CHECK-MAXBW-NEXT:    [[TMP55:%.*]] = insertelement <16 x i16> [[TMP54]], i16 [[TMP39]], i32 4
541; CHECK-MAXBW-NEXT:    [[TMP56:%.*]] = insertelement <16 x i16> [[TMP55]], i16 [[TMP40]], i32 5
542; CHECK-MAXBW-NEXT:    [[TMP57:%.*]] = insertelement <16 x i16> [[TMP56]], i16 [[TMP41]], i32 6
543; CHECK-MAXBW-NEXT:    [[TMP58:%.*]] = insertelement <16 x i16> [[TMP57]], i16 [[TMP42]], i32 7
544; CHECK-MAXBW-NEXT:    [[TMP59:%.*]] = insertelement <16 x i16> [[TMP58]], i16 [[TMP43]], i32 8
545; CHECK-MAXBW-NEXT:    [[TMP60:%.*]] = insertelement <16 x i16> [[TMP59]], i16 [[TMP44]], i32 9
546; CHECK-MAXBW-NEXT:    [[TMP61:%.*]] = insertelement <16 x i16> [[TMP60]], i16 [[TMP45]], i32 10
547; CHECK-MAXBW-NEXT:    [[TMP62:%.*]] = insertelement <16 x i16> [[TMP61]], i16 [[TMP46]], i32 11
548; CHECK-MAXBW-NEXT:    [[TMP63:%.*]] = insertelement <16 x i16> [[TMP62]], i16 [[TMP47]], i32 12
549; CHECK-MAXBW-NEXT:    [[TMP64:%.*]] = insertelement <16 x i16> [[TMP63]], i16 [[TMP48]], i32 13
550; CHECK-MAXBW-NEXT:    [[TMP65:%.*]] = insertelement <16 x i16> [[TMP64]], i16 [[TMP49]], i32 14
551; CHECK-MAXBW-NEXT:    [[TMP66:%.*]] = insertelement <16 x i16> [[TMP65]], i16 [[TMP50]], i32 15
552; CHECK-MAXBW-NEXT:    [[TMP67:%.*]] = zext <16 x i16> [[TMP66]] to <16 x i32>
553; CHECK-MAXBW-NEXT:    [[TMP68:%.*]] = mul <16 x i32> [[TMP67]], [[TMP18]]
554; CHECK-MAXBW-NEXT:    [[TMP69]] = add <16 x i32> [[TMP68]], [[VEC_PHI]]
555; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
556; CHECK-MAXBW-NEXT:    [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
557; CHECK-MAXBW-NEXT:    br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
558; CHECK-MAXBW:       middle.block:
559; CHECK-MAXBW-NEXT:    [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
560; CHECK-MAXBW-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
561; CHECK-MAXBW:       scalar.ph:
562; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
563; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP71]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
564; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
565; CHECK-MAXBW:       for.body:
566; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
567; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
568; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
569; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
570; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
571; CHECK-MAXBW-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
572; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i16, ptr [[GEP_B]], align 2
573; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = zext i16 [[LOAD_B]] to i32
574; CHECK-MAXBW-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
575; CHECK-MAXBW-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
576; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
577; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
578; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
579; CHECK-MAXBW:       for.exit:
580; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP71]], [[MIDDLE_BLOCK]] ]
581; CHECK-MAXBW-NEXT:    ret i32 [[ADD_LCSSA]]
582;
583entry:
584  br label %for.body
585
586for.body:                                         ; preds = %for.body, %entry
587  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
588  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
589  %gep.a = getelementptr i8, ptr %a, i64 %iv
590  %load.a = load i8, ptr %gep.a, align 1
591  %ext.a = zext i8 %load.a to i32
592  %gep.b = getelementptr i8, ptr %b, i64 %iv
593  %load.b = load i16, ptr %gep.b, align 2
594  %ext.b = zext i16 %load.b to i32
595  %mul = mul i32 %ext.b, %ext.a
596  %add = add i32 %mul, %accum
597  %iv.next = add i64 %iv, 1
598  %exitcond.not = icmp eq i64 %iv.next, 1024
599  br i1 %exitcond.not, label %for.exit, label %for.body
600
601for.exit:                        ; preds = %for.body
602  ret i32 %add
603}
604
605define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
606; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_loop_carried(
607; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
608; CHECK-INTERLEAVE1-NEXT:  entry:
609; CHECK-INTERLEAVE1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
610; CHECK-INTERLEAVE1:       vector.ph:
611; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
612; CHECK-INTERLEAVE1:       vector.body:
613; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
614; CHECK-INTERLEAVE1-NEXT:    [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
615; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
616; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
617; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
618; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
619; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
620; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
621; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
622; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
623; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
624; CHECK-INTERLEAVE1-NEXT:    [[TMP7]] = mul <16 x i32> [[TMP6]], [[TMP3]]
625; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
626; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = add <16 x i32> [[TMP7]], [[TMP8]]
627; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
628; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
629; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
630; CHECK-INTERLEAVE1:       middle.block:
631; CHECK-INTERLEAVE1-NEXT:    [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
632; CHECK-INTERLEAVE1-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
633; CHECK-INTERLEAVE1-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
634; CHECK-INTERLEAVE1:       scalar.ph:
635; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
636; CHECK-INTERLEAVE1-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
637; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
638; CHECK-INTERLEAVE1:       for.body:
639; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
640; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
641; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
642; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
643; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
644; CHECK-INTERLEAVE1-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
645; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
646; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
647; CHECK-INTERLEAVE1-NEXT:    [[MUL]] = mul i32 [[EXT_B]], [[EXT_A]]
648; CHECK-INTERLEAVE1-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]]
649; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
650; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
651; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
652; CHECK-INTERLEAVE1:       for.exit:
653; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
654; CHECK-INTERLEAVE1-NEXT:    ret i32 [[ADD_LCSSA]]
655;
656; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_loop_carried(
657; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
658; CHECK-INTERLEAVED-NEXT:  entry:
659; CHECK-INTERLEAVED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
660; CHECK-INTERLEAVED:       vector.ph:
661; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
662; CHECK-INTERLEAVED:       vector.body:
663; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
664; CHECK-INTERLEAVED-NEXT:    [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
665; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
666; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
667; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
668; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
669; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
670; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
671; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
672; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
673; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
674; CHECK-INTERLEAVED-NEXT:    [[TMP7]] = mul <16 x i32> [[TMP6]], [[TMP3]]
675; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
676; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = add <16 x i32> [[TMP7]], [[TMP8]]
677; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
678; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
679; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
680; CHECK-INTERLEAVED:       middle.block:
681; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
682; CHECK-INTERLEAVED-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
683; CHECK-INTERLEAVED-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
684; CHECK-INTERLEAVED:       scalar.ph:
685; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
686; CHECK-INTERLEAVED-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
687; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
688; CHECK-INTERLEAVED:       for.body:
689; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
690; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
691; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
692; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
693; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
694; CHECK-INTERLEAVED-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
695; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
696; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
697; CHECK-INTERLEAVED-NEXT:    [[MUL]] = mul i32 [[EXT_B]], [[EXT_A]]
698; CHECK-INTERLEAVED-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]]
699; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
700; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
701; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
702; CHECK-INTERLEAVED:       for.exit:
703; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
704; CHECK-INTERLEAVED-NEXT:    ret i32 [[ADD_LCSSA]]
705;
706; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_loop_carried(
707; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
708; CHECK-MAXBW-NEXT:  entry:
709; CHECK-MAXBW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
710; CHECK-MAXBW:       vector.ph:
711; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
712; CHECK-MAXBW:       vector.body:
713; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
714; CHECK-MAXBW-NEXT:    [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
715; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
716; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
717; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
718; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
719; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
720; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
721; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
722; CHECK-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
723; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
724; CHECK-MAXBW-NEXT:    [[TMP7]] = mul <16 x i32> [[TMP6]], [[TMP3]]
725; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP7]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
726; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = add <16 x i32> [[TMP7]], [[TMP8]]
727; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
728; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
729; CHECK-MAXBW-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
730; CHECK-MAXBW:       middle.block:
731; CHECK-MAXBW-NEXT:    [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
732; CHECK-MAXBW-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
733; CHECK-MAXBW-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
734; CHECK-MAXBW:       scalar.ph:
735; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
736; CHECK-MAXBW-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
737; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
738; CHECK-MAXBW:       for.body:
739; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
740; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
741; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
742; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
743; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
744; CHECK-MAXBW-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
745; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
746; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
747; CHECK-MAXBW-NEXT:    [[MUL]] = mul i32 [[EXT_B]], [[EXT_A]]
748; CHECK-MAXBW-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]]
749; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
750; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
751; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
752; CHECK-MAXBW:       for.exit:
753; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
754; CHECK-MAXBW-NEXT:    ret i32 [[ADD_LCSSA]]
755;
756entry:
757  br label %for.body
758
759for.body:                                         ; preds = %for.body, %entry
760  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
761  %accum = phi i32 [ 0, %entry ], [ %mul, %for.body ]
762  %gep.a = getelementptr i8, ptr %a, i64 %iv
763  %load.a = load i8, ptr %gep.a, align 1
764  %ext.a = zext i8 %load.a to i32
765  %gep.b = getelementptr i8, ptr %b, i64 %iv
766  %load.b = load i8, ptr %gep.b, align 1
767  %ext.b = zext i8 %load.b to i32
768  %mul = mul i32 %ext.b, %ext.a
769  %add = add i32 %mul, %accum
770  %iv.next = add i64 %iv, 1
771  %exitcond.not = icmp eq i64 %iv.next, 1024
772  br i1 %exitcond.not, label %for.exit, label %for.body
773
774for.exit:                        ; preds = %for.body
775  ret i32 %add
776}
777
778define i32 @not_dotp_not_phi(ptr %a, ptr %b) {
779; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi(
780; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
781; CHECK-INTERLEAVE1-NEXT:  entry:
782; CHECK-INTERLEAVE1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
783; CHECK-INTERLEAVE1:       vector.ph:
784; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
785; CHECK-INTERLEAVE1:       vector.body:
786; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
787; CHECK-INTERLEAVE1-NEXT:    [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
788; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
789; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
790; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
791; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
792; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
793; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
794; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
795; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
796; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
797; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
798; CHECK-INTERLEAVE1-NEXT:    [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
799; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
800; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
801; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
802; CHECK-INTERLEAVE1:       middle.block:
803; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
804; CHECK-INTERLEAVE1-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
805; CHECK-INTERLEAVE1-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
806; CHECK-INTERLEAVE1:       scalar.ph:
807; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
808; CHECK-INTERLEAVE1-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
809; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
810; CHECK-INTERLEAVE1:       for.body:
811; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
812; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
813; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
814; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
815; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
816; CHECK-INTERLEAVE1-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
817; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
818; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
819; CHECK-INTERLEAVE1-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
820; CHECK-INTERLEAVE1-NEXT:    [[ADD]] = add i32 [[MUL]], [[EXT_B]]
821; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
822; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
823; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
824; CHECK-INTERLEAVE1:       for.exit:
825; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
826; CHECK-INTERLEAVE1-NEXT:    ret i32 [[ADD_LCSSA]]
827;
828; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi(
829; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
830; CHECK-INTERLEAVED-NEXT:  entry:
831; CHECK-INTERLEAVED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
832; CHECK-INTERLEAVED:       vector.ph:
833; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
834; CHECK-INTERLEAVED:       vector.body:
835; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
836; CHECK-INTERLEAVED-NEXT:    [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
837; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
838; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
839; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
840; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
841; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
842; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
843; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
844; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
845; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
846; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
847; CHECK-INTERLEAVED-NEXT:    [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
848; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
849; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
850; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
851; CHECK-INTERLEAVED:       middle.block:
852; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
853; CHECK-INTERLEAVED-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
854; CHECK-INTERLEAVED-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
855; CHECK-INTERLEAVED:       scalar.ph:
856; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
857; CHECK-INTERLEAVED-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
858; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
859; CHECK-INTERLEAVED:       for.body:
860; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
861; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
862; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
863; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
864; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
865; CHECK-INTERLEAVED-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
866; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
867; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
868; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
869; CHECK-INTERLEAVED-NEXT:    [[ADD]] = add i32 [[MUL]], [[EXT_B]]
870; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
871; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
872; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
873; CHECK-INTERLEAVED:       for.exit:
874; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
875; CHECK-INTERLEAVED-NEXT:    ret i32 [[ADD_LCSSA]]
876;
877; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi(
878; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
879; CHECK-MAXBW-NEXT:  entry:
880; CHECK-MAXBW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
881; CHECK-MAXBW:       vector.ph:
882; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
883; CHECK-MAXBW:       vector.body:
884; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
885; CHECK-MAXBW-NEXT:    [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
886; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
887; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
888; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
889; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
890; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
891; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
892; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
893; CHECK-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
894; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
895; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
896; CHECK-MAXBW-NEXT:    [[TMP8]] = add <16 x i32> [[TMP7]], [[TMP6]]
897; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
898; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
899; CHECK-MAXBW-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
900; CHECK-MAXBW:       middle.block:
901; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
902; CHECK-MAXBW-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP8]], i32 15
903; CHECK-MAXBW-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
904; CHECK-MAXBW:       scalar.ph:
905; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
906; CHECK-MAXBW-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
907; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
908; CHECK-MAXBW:       for.body:
909; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
910; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
911; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
912; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
913; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
914; CHECK-MAXBW-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
915; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
916; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
917; CHECK-MAXBW-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
918; CHECK-MAXBW-NEXT:    [[ADD]] = add i32 [[MUL]], [[EXT_B]]
919; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
920; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
921; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
922; CHECK-MAXBW:       for.exit:
923; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
924; CHECK-MAXBW-NEXT:    ret i32 [[ADD_LCSSA]]
925;
926entry:
927  br label %for.body
928
929for.body:                                         ; preds = %for.body, %entry
930  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
931  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
932  %gep.a = getelementptr i8, ptr %a, i64 %iv
933  %load.a = load i8, ptr %gep.a, align 1
934  %ext.a = zext i8 %load.a to i32
935  %gep.b = getelementptr i8, ptr %b, i64 %iv
936  %load.b = load i8, ptr %gep.b, align 1
937  %ext.b = zext i8 %load.b to i32
938  %mul = mul i32 %ext.b, %ext.a
939  %add = add i32 %mul, %ext.b
940  %iv.next = add i64 %iv, 1
941  %exitcond.not = icmp eq i64 %iv.next, 1024
942  br i1 %exitcond.not, label %for.exit, label %for.body
943
944for.exit:                        ; preds = %for.body
945  ret i32 %add
946}
947
948define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) {
949; CHECK-INTERLEAVE1-LABEL: define i32 @dotp_unrolled(
950; CHECK-INTERLEAVE1-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
951; CHECK-INTERLEAVE1-NEXT:  entry:
952; CHECK-INTERLEAVE1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], 16
953; CHECK-INTERLEAVE1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
954; CHECK-INTERLEAVE1:       vector.ph:
955; CHECK-INTERLEAVE1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], 16
956; CHECK-INTERLEAVE1-NEXT:    [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
957; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
958; CHECK-INTERLEAVE1:       vector.body:
959; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
960; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE13:%.*]], [[VECTOR_BODY]] ]
961; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE10:%.*]], [[VECTOR_BODY]] ]
962; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE7:%.*]], [[VECTOR_BODY]] ]
963; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
964; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
965; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
966; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
967; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
968; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
969; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
970; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = or disjoint i64 [[TMP0]], 2
971; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
972; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
973; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = or disjoint i64 [[TMP0]], 3
974; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
975; CHECK-INTERLEAVE1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
976; CHECK-INTERLEAVE1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
977; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1
978; CHECK-INTERLEAVE1-NEXT:    [[TMP13:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
979; CHECK-INTERLEAVE1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
980; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1
981; CHECK-INTERLEAVE1-NEXT:    [[TMP15:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32>
982; CHECK-INTERLEAVE1-NEXT:    [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP13]]
983; CHECK-INTERLEAVE1-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]])
984; CHECK-INTERLEAVE1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
985; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1
986; CHECK-INTERLEAVE1-NEXT:    [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD5]] to <16 x i32>
987; CHECK-INTERLEAVE1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
988; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1
989; CHECK-INTERLEAVE1-NEXT:    [[TMP20:%.*]] = sext <16 x i8> [[WIDE_LOAD6]] to <16 x i32>
990; CHECK-INTERLEAVE1-NEXT:    [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP18]], [[TMP20]]
991; CHECK-INTERLEAVE1-NEXT:    [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]])
992; CHECK-INTERLEAVE1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
993; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1
994; CHECK-INTERLEAVE1-NEXT:    [[TMP23:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32>
995; CHECK-INTERLEAVE1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
996; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP24]], align 1
997; CHECK-INTERLEAVE1-NEXT:    [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32>
998; CHECK-INTERLEAVE1-NEXT:    [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP25]]
999; CHECK-INTERLEAVE1-NEXT:    [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]])
1000; CHECK-INTERLEAVE1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
1001; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP27]], align 1
1002; CHECK-INTERLEAVE1-NEXT:    [[TMP28:%.*]] = sext <16 x i8> [[WIDE_LOAD11]] to <16 x i32>
1003; CHECK-INTERLEAVE1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
1004; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP29]], align 1
1005; CHECK-INTERLEAVE1-NEXT:    [[TMP30:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32>
1006; CHECK-INTERLEAVE1-NEXT:    [[TMP31:%.*]] = mul nsw <16 x i32> [[TMP28]], [[TMP30]]
1007; CHECK-INTERLEAVE1-NEXT:    [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]])
1008; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1009; CHECK-INTERLEAVE1-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1010; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1011; CHECK-INTERLEAVE1:       middle.block:
1012; CHECK-INTERLEAVE1-NEXT:    [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE13]])
1013; CHECK-INTERLEAVE1-NEXT:    [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE10]])
1014; CHECK-INTERLEAVE1-NEXT:    [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE7]])
1015; CHECK-INTERLEAVE1-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
1016; CHECK-INTERLEAVE1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
1017; CHECK-INTERLEAVE1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1018; CHECK-INTERLEAVE1:       scalar.ph:
1019; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1020; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1021; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX14:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1022; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1023; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX16:%.*]] = phi i32 [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1024; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
1025; CHECK-INTERLEAVE1:       for.body:
1026; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1027; CHECK-INTERLEAVE1-NEXT:    [[ACCUM3:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_A3:%.*]], [[FOR_BODY]] ]
1028; CHECK-INTERLEAVE1-NEXT:    [[ACCUM2:%.*]] = phi i32 [ [[BC_MERGE_RDX14]], [[SCALAR_PH]] ], [ [[ADD_A2:%.*]], [[FOR_BODY]] ]
1029; CHECK-INTERLEAVE1-NEXT:    [[ACCUM1:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], [[SCALAR_PH]] ], [ [[ADD_A1:%.*]], [[FOR_BODY]] ]
1030; CHECK-INTERLEAVE1-NEXT:    [[ACCUM0:%.*]] = phi i32 [ [[BC_MERGE_RDX16]], [[SCALAR_PH]] ], [ [[ADD_A0:%.*]], [[FOR_BODY]] ]
1031; CHECK-INTERLEAVE1-NEXT:    [[GEP_A0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1032; CHECK-INTERLEAVE1-NEXT:    [[GEP_B0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1033; CHECK-INTERLEAVE1-NEXT:    [[OFFSET_1:%.*]] = or disjoint i64 [[IV]], 1
1034; CHECK-INTERLEAVE1-NEXT:    [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_1]]
1035; CHECK-INTERLEAVE1-NEXT:    [[GEP_B1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_1]]
1036; CHECK-INTERLEAVE1-NEXT:    [[OFFSET_2:%.*]] = or disjoint i64 [[IV]], 2
1037; CHECK-INTERLEAVE1-NEXT:    [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_2]]
1038; CHECK-INTERLEAVE1-NEXT:    [[GEP_B2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_2]]
1039; CHECK-INTERLEAVE1-NEXT:    [[OFFSET_3:%.*]] = or disjoint i64 [[IV]], 3
1040; CHECK-INTERLEAVE1-NEXT:    [[GEP_A3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_3]]
1041; CHECK-INTERLEAVE1-NEXT:    [[GEP_B3:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_3]]
1042; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A0:%.*]] = load i8, ptr [[GEP_A0]], align 1
1043; CHECK-INTERLEAVE1-NEXT:    [[EXT_A0:%.*]] = sext i8 [[LOAD_A0]] to i32
1044; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B0:%.*]] = load i8, ptr [[GEP_B0]], align 1
1045; CHECK-INTERLEAVE1-NEXT:    [[EXT_B0:%.*]] = sext i8 [[LOAD_B0]] to i32
1046; CHECK-INTERLEAVE1-NEXT:    [[MUL_A0:%.*]] = mul nsw i32 [[EXT_B0]], [[EXT_A0]]
1047; CHECK-INTERLEAVE1-NEXT:    [[ADD_A0]] = add nsw i32 [[MUL_A0]], [[ACCUM0]]
1048; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A1:%.*]] = load i8, ptr [[GEP_A1]], align 1
1049; CHECK-INTERLEAVE1-NEXT:    [[EXT_A1:%.*]] = sext i8 [[LOAD_A1]] to i32
1050; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1
1051; CHECK-INTERLEAVE1-NEXT:    [[EXT_B1:%.*]] = sext i8 [[LOAD_B1]] to i32
1052; CHECK-INTERLEAVE1-NEXT:    [[MUL_A1:%.*]] = mul nsw i32 [[EXT_A1]], [[EXT_B1]]
1053; CHECK-INTERLEAVE1-NEXT:    [[ADD_A1]] = add nsw i32 [[MUL_A1]], [[ACCUM1]]
1054; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A2:%.*]] = load i8, ptr [[GEP_A2]], align 1
1055; CHECK-INTERLEAVE1-NEXT:    [[EXT_A2:%.*]] = sext i8 [[LOAD_A2]] to i32
1056; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B2:%.*]] = load i8, ptr [[GEP_B2]], align 1
1057; CHECK-INTERLEAVE1-NEXT:    [[EXT_B2:%.*]] = sext i8 [[LOAD_B2]] to i32
1058; CHECK-INTERLEAVE1-NEXT:    [[MUL_A2:%.*]] = mul nsw i32 [[EXT_A2]], [[EXT_B2]]
1059; CHECK-INTERLEAVE1-NEXT:    [[ADD_A2]] = add nsw i32 [[MUL_A2]], [[ACCUM2]]
1060; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A3:%.*]] = load i8, ptr [[GEP_A3]], align 1
1061; CHECK-INTERLEAVE1-NEXT:    [[EXT_A3:%.*]] = sext i8 [[LOAD_A3]] to i32
1062; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B3:%.*]] = load i8, ptr [[GEP_B3]], align 1
1063; CHECK-INTERLEAVE1-NEXT:    [[EXT_B3:%.*]] = sext i8 [[LOAD_B3]] to i32
1064; CHECK-INTERLEAVE1-NEXT:    [[MUL_A3:%.*]] = mul nsw i32 [[EXT_A3]], [[EXT_B3]]
1065; CHECK-INTERLEAVE1-NEXT:    [[ADD_A3]] = add nsw i32 [[MUL_A3]], [[ACCUM3]]
1066; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1067; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[NUM_IN]]
1068; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1069; CHECK-INTERLEAVE1:       exit:
1070; CHECK-INTERLEAVE1-NEXT:    [[ADD_A0_LCSSA:%.*]] = phi i32 [ [[ADD_A0]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ]
1071; CHECK-INTERLEAVE1-NEXT:    [[ADD_A1_LCSSA:%.*]] = phi i32 [ [[ADD_A1]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
1072; CHECK-INTERLEAVE1-NEXT:    [[ADD_A2_LCSSA:%.*]] = phi i32 [ [[ADD_A2]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1073; CHECK-INTERLEAVE1-NEXT:    [[ADD_A3_LCSSA:%.*]] = phi i32 [ [[ADD_A3]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
1074; CHECK-INTERLEAVE1-NEXT:    [[RESULT0:%.*]] = add nsw i32 [[ADD_A0_LCSSA]], [[ADD_A1_LCSSA]]
1075; CHECK-INTERLEAVE1-NEXT:    [[RESULT1:%.*]] = add nsw i32 [[ADD_A2_LCSSA]], [[ADD_A3_LCSSA]]
1076; CHECK-INTERLEAVE1-NEXT:    [[RESULT:%.*]] = add nsw i32 [[RESULT0]], [[RESULT1]]
1077; CHECK-INTERLEAVE1-NEXT:    ret i32 [[RESULT]]
1078;
1079; CHECK-INTERLEAVED-LABEL: define i32 @dotp_unrolled(
1080; CHECK-INTERLEAVED-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1081; CHECK-INTERLEAVED-NEXT:  entry:
1082; CHECK-INTERLEAVED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], 16
1083; CHECK-INTERLEAVED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1084; CHECK-INTERLEAVED:       vector.ph:
1085; CHECK-INTERLEAVED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], 16
1086; CHECK-INTERLEAVED-NEXT:    [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
1087; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
1088; CHECK-INTERLEAVED:       vector.body:
1089; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1090; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE13:%.*]], [[VECTOR_BODY]] ]
1091; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE10:%.*]], [[VECTOR_BODY]] ]
1092; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE7:%.*]], [[VECTOR_BODY]] ]
1093; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
1094; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1095; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
1096; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1097; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
1098; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
1099; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
1100; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = or disjoint i64 [[TMP0]], 2
1101; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
1102; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
1103; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = or disjoint i64 [[TMP0]], 3
1104; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
1105; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
1106; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
1107; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1
1108; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
1109; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
1110; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1
1111; CHECK-INTERLEAVED-NEXT:    [[TMP15:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32>
1112; CHECK-INTERLEAVED-NEXT:    [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP13]]
1113; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]])
1114; CHECK-INTERLEAVED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
1115; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1
1116; CHECK-INTERLEAVED-NEXT:    [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD5]] to <16 x i32>
1117; CHECK-INTERLEAVED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
1118; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1
1119; CHECK-INTERLEAVED-NEXT:    [[TMP20:%.*]] = sext <16 x i8> [[WIDE_LOAD6]] to <16 x i32>
1120; CHECK-INTERLEAVED-NEXT:    [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP18]], [[TMP20]]
1121; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]])
1122; CHECK-INTERLEAVED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
1123; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1
1124; CHECK-INTERLEAVED-NEXT:    [[TMP23:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32>
1125; CHECK-INTERLEAVED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
1126; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP24]], align 1
1127; CHECK-INTERLEAVED-NEXT:    [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32>
1128; CHECK-INTERLEAVED-NEXT:    [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP25]]
1129; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]])
1130; CHECK-INTERLEAVED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
1131; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP27]], align 1
1132; CHECK-INTERLEAVED-NEXT:    [[TMP28:%.*]] = sext <16 x i8> [[WIDE_LOAD11]] to <16 x i32>
1133; CHECK-INTERLEAVED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
1134; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP29]], align 1
1135; CHECK-INTERLEAVED-NEXT:    [[TMP30:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32>
1136; CHECK-INTERLEAVED-NEXT:    [[TMP31:%.*]] = mul nsw <16 x i32> [[TMP28]], [[TMP30]]
1137; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]])
1138; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1139; CHECK-INTERLEAVED-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1140; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1141; CHECK-INTERLEAVED:       middle.block:
1142; CHECK-INTERLEAVED-NEXT:    [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE13]])
1143; CHECK-INTERLEAVED-NEXT:    [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE10]])
1144; CHECK-INTERLEAVED-NEXT:    [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE7]])
1145; CHECK-INTERLEAVED-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
1146; CHECK-INTERLEAVED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
1147; CHECK-INTERLEAVED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1148; CHECK-INTERLEAVED:       scalar.ph:
1149; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1150; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1151; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX14:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1152; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1153; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX16:%.*]] = phi i32 [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1154; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
1155; CHECK-INTERLEAVED:       for.body:
1156; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1157; CHECK-INTERLEAVED-NEXT:    [[ACCUM3:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_A3:%.*]], [[FOR_BODY]] ]
1158; CHECK-INTERLEAVED-NEXT:    [[ACCUM2:%.*]] = phi i32 [ [[BC_MERGE_RDX14]], [[SCALAR_PH]] ], [ [[ADD_A2:%.*]], [[FOR_BODY]] ]
1159; CHECK-INTERLEAVED-NEXT:    [[ACCUM1:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], [[SCALAR_PH]] ], [ [[ADD_A1:%.*]], [[FOR_BODY]] ]
1160; CHECK-INTERLEAVED-NEXT:    [[ACCUM0:%.*]] = phi i32 [ [[BC_MERGE_RDX16]], [[SCALAR_PH]] ], [ [[ADD_A0:%.*]], [[FOR_BODY]] ]
1161; CHECK-INTERLEAVED-NEXT:    [[GEP_A0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1162; CHECK-INTERLEAVED-NEXT:    [[GEP_B0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1163; CHECK-INTERLEAVED-NEXT:    [[OFFSET_1:%.*]] = or disjoint i64 [[IV]], 1
1164; CHECK-INTERLEAVED-NEXT:    [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_1]]
1165; CHECK-INTERLEAVED-NEXT:    [[GEP_B1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_1]]
1166; CHECK-INTERLEAVED-NEXT:    [[OFFSET_2:%.*]] = or disjoint i64 [[IV]], 2
1167; CHECK-INTERLEAVED-NEXT:    [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_2]]
1168; CHECK-INTERLEAVED-NEXT:    [[GEP_B2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_2]]
1169; CHECK-INTERLEAVED-NEXT:    [[OFFSET_3:%.*]] = or disjoint i64 [[IV]], 3
1170; CHECK-INTERLEAVED-NEXT:    [[GEP_A3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_3]]
1171; CHECK-INTERLEAVED-NEXT:    [[GEP_B3:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_3]]
1172; CHECK-INTERLEAVED-NEXT:    [[LOAD_A0:%.*]] = load i8, ptr [[GEP_A0]], align 1
1173; CHECK-INTERLEAVED-NEXT:    [[EXT_A0:%.*]] = sext i8 [[LOAD_A0]] to i32
1174; CHECK-INTERLEAVED-NEXT:    [[LOAD_B0:%.*]] = load i8, ptr [[GEP_B0]], align 1
1175; CHECK-INTERLEAVED-NEXT:    [[EXT_B0:%.*]] = sext i8 [[LOAD_B0]] to i32
1176; CHECK-INTERLEAVED-NEXT:    [[MUL_A0:%.*]] = mul nsw i32 [[EXT_B0]], [[EXT_A0]]
1177; CHECK-INTERLEAVED-NEXT:    [[ADD_A0]] = add nsw i32 [[MUL_A0]], [[ACCUM0]]
1178; CHECK-INTERLEAVED-NEXT:    [[LOAD_A1:%.*]] = load i8, ptr [[GEP_A1]], align 1
1179; CHECK-INTERLEAVED-NEXT:    [[EXT_A1:%.*]] = sext i8 [[LOAD_A1]] to i32
1180; CHECK-INTERLEAVED-NEXT:    [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1
1181; CHECK-INTERLEAVED-NEXT:    [[EXT_B1:%.*]] = sext i8 [[LOAD_B1]] to i32
1182; CHECK-INTERLEAVED-NEXT:    [[MUL_A1:%.*]] = mul nsw i32 [[EXT_A1]], [[EXT_B1]]
1183; CHECK-INTERLEAVED-NEXT:    [[ADD_A1]] = add nsw i32 [[MUL_A1]], [[ACCUM1]]
1184; CHECK-INTERLEAVED-NEXT:    [[LOAD_A2:%.*]] = load i8, ptr [[GEP_A2]], align 1
1185; CHECK-INTERLEAVED-NEXT:    [[EXT_A2:%.*]] = sext i8 [[LOAD_A2]] to i32
1186; CHECK-INTERLEAVED-NEXT:    [[LOAD_B2:%.*]] = load i8, ptr [[GEP_B2]], align 1
1187; CHECK-INTERLEAVED-NEXT:    [[EXT_B2:%.*]] = sext i8 [[LOAD_B2]] to i32
1188; CHECK-INTERLEAVED-NEXT:    [[MUL_A2:%.*]] = mul nsw i32 [[EXT_A2]], [[EXT_B2]]
1189; CHECK-INTERLEAVED-NEXT:    [[ADD_A2]] = add nsw i32 [[MUL_A2]], [[ACCUM2]]
1190; CHECK-INTERLEAVED-NEXT:    [[LOAD_A3:%.*]] = load i8, ptr [[GEP_A3]], align 1
1191; CHECK-INTERLEAVED-NEXT:    [[EXT_A3:%.*]] = sext i8 [[LOAD_A3]] to i32
1192; CHECK-INTERLEAVED-NEXT:    [[LOAD_B3:%.*]] = load i8, ptr [[GEP_B3]], align 1
1193; CHECK-INTERLEAVED-NEXT:    [[EXT_B3:%.*]] = sext i8 [[LOAD_B3]] to i32
1194; CHECK-INTERLEAVED-NEXT:    [[MUL_A3:%.*]] = mul nsw i32 [[EXT_A3]], [[EXT_B3]]
1195; CHECK-INTERLEAVED-NEXT:    [[ADD_A3]] = add nsw i32 [[MUL_A3]], [[ACCUM3]]
1196; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1197; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[NUM_IN]]
1198; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1199; CHECK-INTERLEAVED:       exit:
1200; CHECK-INTERLEAVED-NEXT:    [[ADD_A0_LCSSA:%.*]] = phi i32 [ [[ADD_A0]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ]
1201; CHECK-INTERLEAVED-NEXT:    [[ADD_A1_LCSSA:%.*]] = phi i32 [ [[ADD_A1]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
1202; CHECK-INTERLEAVED-NEXT:    [[ADD_A2_LCSSA:%.*]] = phi i32 [ [[ADD_A2]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1203; CHECK-INTERLEAVED-NEXT:    [[ADD_A3_LCSSA:%.*]] = phi i32 [ [[ADD_A3]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
1204; CHECK-INTERLEAVED-NEXT:    [[RESULT0:%.*]] = add nsw i32 [[ADD_A0_LCSSA]], [[ADD_A1_LCSSA]]
1205; CHECK-INTERLEAVED-NEXT:    [[RESULT1:%.*]] = add nsw i32 [[ADD_A2_LCSSA]], [[ADD_A3_LCSSA]]
1206; CHECK-INTERLEAVED-NEXT:    [[RESULT:%.*]] = add nsw i32 [[RESULT0]], [[RESULT1]]
1207; CHECK-INTERLEAVED-NEXT:    ret i32 [[RESULT]]
1208;
1209; CHECK-MAXBW-LABEL: define i32 @dotp_unrolled(
1210; CHECK-MAXBW-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1211; CHECK-MAXBW-NEXT:  entry:
1212; CHECK-MAXBW-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], 16
1213; CHECK-MAXBW-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1214; CHECK-MAXBW:       vector.ph:
1215; CHECK-MAXBW-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], 16
1216; CHECK-MAXBW-NEXT:    [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
1217; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
1218; CHECK-MAXBW:       vector.body:
1219; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1220; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE13:%.*]], [[VECTOR_BODY]] ]
1221; CHECK-MAXBW-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE10:%.*]], [[VECTOR_BODY]] ]
1222; CHECK-MAXBW-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE7:%.*]], [[VECTOR_BODY]] ]
1223; CHECK-MAXBW-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
1224; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1225; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
1226; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1227; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
1228; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
1229; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
1230; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = or disjoint i64 [[TMP0]], 2
1231; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
1232; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
1233; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = or disjoint i64 [[TMP0]], 3
1234; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
1235; CHECK-MAXBW-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
1236; CHECK-MAXBW-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
1237; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1
1238; CHECK-MAXBW-NEXT:    [[TMP13:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
1239; CHECK-MAXBW-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
1240; CHECK-MAXBW-NEXT:    [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1
1241; CHECK-MAXBW-NEXT:    [[TMP15:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32>
1242; CHECK-MAXBW-NEXT:    [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP13]]
1243; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]])
1244; CHECK-MAXBW-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
1245; CHECK-MAXBW-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1
1246; CHECK-MAXBW-NEXT:    [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD5]] to <16 x i32>
1247; CHECK-MAXBW-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
1248; CHECK-MAXBW-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1
1249; CHECK-MAXBW-NEXT:    [[TMP20:%.*]] = sext <16 x i8> [[WIDE_LOAD6]] to <16 x i32>
1250; CHECK-MAXBW-NEXT:    [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP18]], [[TMP20]]
1251; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]])
1252; CHECK-MAXBW-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
1253; CHECK-MAXBW-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1
1254; CHECK-MAXBW-NEXT:    [[TMP23:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32>
1255; CHECK-MAXBW-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
1256; CHECK-MAXBW-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP24]], align 1
1257; CHECK-MAXBW-NEXT:    [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32>
1258; CHECK-MAXBW-NEXT:    [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP25]]
1259; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]])
1260; CHECK-MAXBW-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
1261; CHECK-MAXBW-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP27]], align 1
1262; CHECK-MAXBW-NEXT:    [[TMP28:%.*]] = sext <16 x i8> [[WIDE_LOAD11]] to <16 x i32>
1263; CHECK-MAXBW-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
1264; CHECK-MAXBW-NEXT:    [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP29]], align 1
1265; CHECK-MAXBW-NEXT:    [[TMP30:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32>
1266; CHECK-MAXBW-NEXT:    [[TMP31:%.*]] = mul nsw <16 x i32> [[TMP28]], [[TMP30]]
1267; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]])
1268; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1269; CHECK-MAXBW-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1270; CHECK-MAXBW-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1271; CHECK-MAXBW:       middle.block:
1272; CHECK-MAXBW-NEXT:    [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE13]])
1273; CHECK-MAXBW-NEXT:    [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE10]])
1274; CHECK-MAXBW-NEXT:    [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE7]])
1275; CHECK-MAXBW-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
1276; CHECK-MAXBW-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
1277; CHECK-MAXBW-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1278; CHECK-MAXBW:       scalar.ph:
1279; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1280; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1281; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX14:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1282; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1283; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX16:%.*]] = phi i32 [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1284; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
1285; CHECK-MAXBW:       for.body:
1286; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1287; CHECK-MAXBW-NEXT:    [[ACCUM3:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_A3:%.*]], [[FOR_BODY]] ]
1288; CHECK-MAXBW-NEXT:    [[ACCUM2:%.*]] = phi i32 [ [[BC_MERGE_RDX14]], [[SCALAR_PH]] ], [ [[ADD_A2:%.*]], [[FOR_BODY]] ]
1289; CHECK-MAXBW-NEXT:    [[ACCUM1:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], [[SCALAR_PH]] ], [ [[ADD_A1:%.*]], [[FOR_BODY]] ]
1290; CHECK-MAXBW-NEXT:    [[ACCUM0:%.*]] = phi i32 [ [[BC_MERGE_RDX16]], [[SCALAR_PH]] ], [ [[ADD_A0:%.*]], [[FOR_BODY]] ]
1291; CHECK-MAXBW-NEXT:    [[GEP_A0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1292; CHECK-MAXBW-NEXT:    [[GEP_B0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1293; CHECK-MAXBW-NEXT:    [[OFFSET_1:%.*]] = or disjoint i64 [[IV]], 1
1294; CHECK-MAXBW-NEXT:    [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_1]]
1295; CHECK-MAXBW-NEXT:    [[GEP_B1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_1]]
1296; CHECK-MAXBW-NEXT:    [[OFFSET_2:%.*]] = or disjoint i64 [[IV]], 2
1297; CHECK-MAXBW-NEXT:    [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_2]]
1298; CHECK-MAXBW-NEXT:    [[GEP_B2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_2]]
1299; CHECK-MAXBW-NEXT:    [[OFFSET_3:%.*]] = or disjoint i64 [[IV]], 3
1300; CHECK-MAXBW-NEXT:    [[GEP_A3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_3]]
1301; CHECK-MAXBW-NEXT:    [[GEP_B3:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_3]]
1302; CHECK-MAXBW-NEXT:    [[LOAD_A0:%.*]] = load i8, ptr [[GEP_A0]], align 1
1303; CHECK-MAXBW-NEXT:    [[EXT_A0:%.*]] = sext i8 [[LOAD_A0]] to i32
1304; CHECK-MAXBW-NEXT:    [[LOAD_B0:%.*]] = load i8, ptr [[GEP_B0]], align 1
1305; CHECK-MAXBW-NEXT:    [[EXT_B0:%.*]] = sext i8 [[LOAD_B0]] to i32
1306; CHECK-MAXBW-NEXT:    [[MUL_A0:%.*]] = mul nsw i32 [[EXT_B0]], [[EXT_A0]]
1307; CHECK-MAXBW-NEXT:    [[ADD_A0]] = add nsw i32 [[MUL_A0]], [[ACCUM0]]
1308; CHECK-MAXBW-NEXT:    [[LOAD_A1:%.*]] = load i8, ptr [[GEP_A1]], align 1
1309; CHECK-MAXBW-NEXT:    [[EXT_A1:%.*]] = sext i8 [[LOAD_A1]] to i32
1310; CHECK-MAXBW-NEXT:    [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1
1311; CHECK-MAXBW-NEXT:    [[EXT_B1:%.*]] = sext i8 [[LOAD_B1]] to i32
1312; CHECK-MAXBW-NEXT:    [[MUL_A1:%.*]] = mul nsw i32 [[EXT_A1]], [[EXT_B1]]
1313; CHECK-MAXBW-NEXT:    [[ADD_A1]] = add nsw i32 [[MUL_A1]], [[ACCUM1]]
1314; CHECK-MAXBW-NEXT:    [[LOAD_A2:%.*]] = load i8, ptr [[GEP_A2]], align 1
1315; CHECK-MAXBW-NEXT:    [[EXT_A2:%.*]] = sext i8 [[LOAD_A2]] to i32
1316; CHECK-MAXBW-NEXT:    [[LOAD_B2:%.*]] = load i8, ptr [[GEP_B2]], align 1
1317; CHECK-MAXBW-NEXT:    [[EXT_B2:%.*]] = sext i8 [[LOAD_B2]] to i32
1318; CHECK-MAXBW-NEXT:    [[MUL_A2:%.*]] = mul nsw i32 [[EXT_A2]], [[EXT_B2]]
1319; CHECK-MAXBW-NEXT:    [[ADD_A2]] = add nsw i32 [[MUL_A2]], [[ACCUM2]]
1320; CHECK-MAXBW-NEXT:    [[LOAD_A3:%.*]] = load i8, ptr [[GEP_A3]], align 1
1321; CHECK-MAXBW-NEXT:    [[EXT_A3:%.*]] = sext i8 [[LOAD_A3]] to i32
1322; CHECK-MAXBW-NEXT:    [[LOAD_B3:%.*]] = load i8, ptr [[GEP_B3]], align 1
1323; CHECK-MAXBW-NEXT:    [[EXT_B3:%.*]] = sext i8 [[LOAD_B3]] to i32
1324; CHECK-MAXBW-NEXT:    [[MUL_A3:%.*]] = mul nsw i32 [[EXT_A3]], [[EXT_B3]]
1325; CHECK-MAXBW-NEXT:    [[ADD_A3]] = add nsw i32 [[MUL_A3]], [[ACCUM3]]
1326; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1327; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[NUM_IN]]
1328; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1329; CHECK-MAXBW:       exit:
1330; CHECK-MAXBW-NEXT:    [[ADD_A0_LCSSA:%.*]] = phi i32 [ [[ADD_A0]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ]
1331; CHECK-MAXBW-NEXT:    [[ADD_A1_LCSSA:%.*]] = phi i32 [ [[ADD_A1]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
1332; CHECK-MAXBW-NEXT:    [[ADD_A2_LCSSA:%.*]] = phi i32 [ [[ADD_A2]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1333; CHECK-MAXBW-NEXT:    [[ADD_A3_LCSSA:%.*]] = phi i32 [ [[ADD_A3]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
1334; CHECK-MAXBW-NEXT:    [[RESULT0:%.*]] = add nsw i32 [[ADD_A0_LCSSA]], [[ADD_A1_LCSSA]]
1335; CHECK-MAXBW-NEXT:    [[RESULT1:%.*]] = add nsw i32 [[ADD_A2_LCSSA]], [[ADD_A3_LCSSA]]
1336; CHECK-MAXBW-NEXT:    [[RESULT:%.*]] = add nsw i32 [[RESULT0]], [[RESULT1]]
1337; CHECK-MAXBW-NEXT:    ret i32 [[RESULT]]
1338;
1339entry:
1340  br label %for.body
1341
1342for.body:                                    ; preds = %entry, %for.body
1343  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1344  %accum3 = phi i32 [ 0, %entry ], [ %add.a3, %for.body ]
1345  %accum2 = phi i32 [ 0, %entry ], [ %add.a2, %for.body ]
1346  %accum1 = phi i32 [ 0, %entry ], [ %add.a1, %for.body ]
1347  %accum0 = phi i32 [ 0, %entry ], [ %add.a0, %for.body ]
1348  %gep.a0 = getelementptr inbounds i8, ptr %a, i64 %iv
1349  %gep.b0 = getelementptr inbounds i8, ptr %b, i64 %iv
1350  %offset.1 = or disjoint i64 %iv, 1
1351  %gep.a1 = getelementptr inbounds i8, ptr %a, i64 %offset.1
1352  %gep.b1 = getelementptr inbounds i8, ptr %b, i64 %offset.1
1353  %offset.2 = or disjoint i64 %iv, 2
1354  %gep.a2 = getelementptr inbounds i8, ptr %a, i64 %offset.2
1355  %gep.b2 = getelementptr inbounds i8, ptr %b, i64 %offset.2
1356  %offset.3 = or disjoint i64 %iv, 3
1357  %gep.a3 = getelementptr inbounds i8, ptr %a, i64 %offset.3
1358  %gep.b3 = getelementptr inbounds i8, ptr %b, i64 %offset.3
1359  %load.a0 = load i8, ptr %gep.a0, align 1
1360  %ext.a0 = sext i8 %load.a0 to i32
1361  %load.b0 = load i8, ptr %gep.b0, align 1
1362  %ext.b0 = sext i8 %load.b0 to i32
1363  %mul.a0 = mul nsw i32 %ext.b0, %ext.a0
1364  %add.a0 = add nsw i32 %mul.a0, %accum0
1365  %load.a1 = load i8, ptr %gep.a1, align 1
1366  %ext.a1 = sext i8 %load.a1 to i32
1367  %load.b1 = load i8, ptr %gep.b1, align 1
1368  %ext.b1 = sext i8 %load.b1 to i32
1369  %mul.a1 = mul nsw i32 %ext.a1, %ext.b1
1370  %add.a1 = add nsw i32 %mul.a1, %accum1
1371  %load.a2 = load i8, ptr %gep.a2, align 1
1372  %ext.a2 = sext i8 %load.a2 to i32
1373  %load.b2 = load i8, ptr %gep.b2, align 1
1374  %ext.b2 = sext i8 %load.b2 to i32
1375  %mul.a2 = mul nsw i32 %ext.a2, %ext.b2
1376  %add.a2 = add nsw i32 %mul.a2, %accum2
1377  %load.a3 = load i8, ptr %gep.a3, align 1
1378  %ext.a3 = sext i8 %load.a3 to i32
1379  %load.b3 = load i8, ptr %gep.b3, align 1
1380  %ext.b3 = sext i8 %load.b3 to i32
1381  %mul.a3 = mul nsw i32 %ext.a3, %ext.b3
1382  %add.a3 = add nsw i32 %mul.a3, %accum3
1383  %iv.next = add nuw nsw i64 %iv, 1
1384  %exitcond.not = icmp eq i64 %iv.next, %num_in
1385  br i1 %exitcond.not, label %exit, label %for.body
1386
1387exit:                                        ; preds = %for.body
1388  %result0 = add nsw i32 %add.a0, %add.a1
1389  %result1 = add nsw i32 %add.a2, %add.a3
1390  %result = add nsw i32 %result0, %result1
1391  ret i32 %result
1392}
1393
1394define i32 @not_dotp_predicated(i64 %N, ptr %a, ptr %b) {
1395; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_predicated(
1396; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1397; CHECK-INTERLEAVE1-NEXT:  entry:
1398; CHECK-INTERLEAVE1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
1399; CHECK-INTERLEAVE1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1400; CHECK-INTERLEAVE1:       vector.ph:
1401; CHECK-INTERLEAVE1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
1402; CHECK-INTERLEAVE1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1403; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
1404; CHECK-INTERLEAVE1:       vector.body:
1405; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1406; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
1407; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1408; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
1409; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
1410; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
1411; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
1412; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1413; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
1414; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
1415; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
1416; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = mul nsw <16 x i32> [[TMP6]], [[TMP3]]
1417; CHECK-INTERLEAVE1-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]])
1418; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1419; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1420; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1421; CHECK-INTERLEAVE1:       middle.block:
1422; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
1423; CHECK-INTERLEAVE1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1424; CHECK-INTERLEAVE1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1425; CHECK-INTERLEAVE1:       scalar.ph:
1426; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1427; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1428; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
1429; CHECK-INTERLEAVE1:       for.body:
1430; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1431; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1432; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1433; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
1434; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
1435; CHECK-INTERLEAVE1-NEXT:    [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1436; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
1437; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
1438; CHECK-INTERLEAVE1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]]
1439; CHECK-INTERLEAVE1-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]]
1440; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1441; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1442; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1443; CHECK-INTERLEAVE1:       exit:
1444; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
1445; CHECK-INTERLEAVE1-NEXT:    ret i32 [[ADD_LCSSA]]
1446;
1447; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_predicated(
1448; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1449; CHECK-INTERLEAVED-NEXT:  entry:
1450; CHECK-INTERLEAVED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
1451; CHECK-INTERLEAVED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1452; CHECK-INTERLEAVED:       vector.ph:
1453; CHECK-INTERLEAVED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 32
1454; CHECK-INTERLEAVED-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1455; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
1456; CHECK-INTERLEAVED:       vector.body:
1457; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1458; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
1459; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
1460; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1461; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
1462; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
1463; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16
1464; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
1465; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
1466; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
1467; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
1468; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1469; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
1470; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16
1471; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1
1472; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1
1473; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32>
1474; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32>
1475; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = mul nsw <16 x i32> [[TMP9]], [[TMP4]]
1476; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP10]], [[TMP5]]
1477; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]])
1478; CHECK-INTERLEAVED-NEXT:    [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]])
1479; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
1480; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1481; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1482; CHECK-INTERLEAVED:       middle.block:
1483; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
1484; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
1485; CHECK-INTERLEAVED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1486; CHECK-INTERLEAVED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1487; CHECK-INTERLEAVED:       scalar.ph:
1488; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1489; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1490; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
1491; CHECK-INTERLEAVED:       for.body:
1492; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1493; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1494; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1495; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
1496; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
1497; CHECK-INTERLEAVED-NEXT:    [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1498; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
1499; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
1500; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]]
1501; CHECK-INTERLEAVED-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]]
1502; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1503; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1504; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1505; CHECK-INTERLEAVED:       exit:
1506; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
1507; CHECK-INTERLEAVED-NEXT:    ret i32 [[ADD_LCSSA]]
1508;
1509; CHECK-MAXBW-LABEL: define i32 @not_dotp_predicated(
1510; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1511; CHECK-MAXBW-NEXT:  entry:
1512; CHECK-MAXBW-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
1513; CHECK-MAXBW-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1514; CHECK-MAXBW:       vector.ph:
1515; CHECK-MAXBW-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
1516; CHECK-MAXBW-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1517; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
1518; CHECK-MAXBW:       vector.body:
1519; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1520; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
1521; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1522; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
1523; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
1524; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
1525; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
1526; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1527; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
1528; CHECK-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
1529; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
1530; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = mul nsw <16 x i32> [[TMP6]], [[TMP3]]
1531; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]])
1532; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1533; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1534; CHECK-MAXBW-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1535; CHECK-MAXBW:       middle.block:
1536; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
1537; CHECK-MAXBW-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1538; CHECK-MAXBW-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1539; CHECK-MAXBW:       scalar.ph:
1540; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1541; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1542; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
1543; CHECK-MAXBW:       for.body:
1544; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1545; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1546; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1547; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
1548; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
1549; CHECK-MAXBW-NEXT:    [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1550; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
1551; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
1552; CHECK-MAXBW-NEXT:    [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]]
1553; CHECK-MAXBW-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]]
1554; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1555; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1556; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1557; CHECK-MAXBW:       exit:
1558; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
1559; CHECK-MAXBW-NEXT:    ret i32 [[ADD_LCSSA]]
1560;
1561entry:
1562  br label %for.body
1563
1564for.body:                                         ; preds = %entry, %for.body
1565  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1566  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
1567  %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv
1568  %load.a = load i8, ptr %gep.a, align 1
1569  %ext.a = sext i8 %load.a to i32
1570  %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv
1571  %load.b = load i8, ptr %gep.b, align 1
1572  %ext.b = sext i8 %load.b to i32
1573  %mul = mul nsw i32 %ext.b, %ext.a
1574  %add = add nsw i32 %mul, %accum
1575  %iv.next = add nuw nsw i64 %iv, 1
1576  %exitcond.not = icmp eq i64 %iv.next, %N
1577  br i1 %exitcond.not, label %exit, label %for.body
1578
1579exit:                        ; preds = %for.body
1580  ret i32 %add
1581}
1582
1583define i32 @not_dotp_predicated_pragma(i64 %N, ptr %a, ptr %b) {
1584; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_predicated_pragma(
1585; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1586; CHECK-INTERLEAVE1-NEXT:  entry:
1587; CHECK-INTERLEAVE1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1588; CHECK-INTERLEAVE1:       vector.ph:
1589; CHECK-INTERLEAVE1-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], 15
1590; CHECK-INTERLEAVE1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
1591; CHECK-INTERLEAVE1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1592; CHECK-INTERLEAVE1-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1593; CHECK-INTERLEAVE1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1594; CHECK-INTERLEAVE1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
1595; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
1596; CHECK-INTERLEAVE1:       vector.body:
1597; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
1598; CHECK-INTERLEAVE1-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
1599; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP180:%.*]], [[PRED_LOAD_CONTINUE62]] ]
1600; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1601; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1602; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1603; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1604; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1605; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1606; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1607; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1608; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1609; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1610; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1611; CHECK-INTERLEAVE1-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1612; CHECK-INTERLEAVE1-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1613; CHECK-INTERLEAVE1-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1614; CHECK-INTERLEAVE1-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1615; CHECK-INTERLEAVE1-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1616; CHECK-INTERLEAVE1-NEXT:    [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
1617; CHECK-INTERLEAVE1-NEXT:    [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
1618; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1619; CHECK-INTERLEAVE1:       pred.load.if:
1620; CHECK-INTERLEAVE1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1621; CHECK-INTERLEAVE1-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
1622; CHECK-INTERLEAVE1-NEXT:    [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
1623; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1624; CHECK-INTERLEAVE1:       pred.load.continue:
1625; CHECK-INTERLEAVE1-NEXT:    [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
1626; CHECK-INTERLEAVE1-NEXT:    [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
1627; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1628; CHECK-INTERLEAVE1:       pred.load.if1:
1629; CHECK-INTERLEAVE1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1630; CHECK-INTERLEAVE1-NEXT:    [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
1631; CHECK-INTERLEAVE1-NEXT:    [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
1632; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1633; CHECK-INTERLEAVE1:       pred.load.continue2:
1634; CHECK-INTERLEAVE1-NEXT:    [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
1635; CHECK-INTERLEAVE1-NEXT:    [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
1636; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1637; CHECK-INTERLEAVE1:       pred.load.if3:
1638; CHECK-INTERLEAVE1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
1639; CHECK-INTERLEAVE1-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
1640; CHECK-INTERLEAVE1-NEXT:    [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
1641; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1642; CHECK-INTERLEAVE1:       pred.load.continue4:
1643; CHECK-INTERLEAVE1-NEXT:    [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
1644; CHECK-INTERLEAVE1-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
1645; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
1646; CHECK-INTERLEAVE1:       pred.load.if5:
1647; CHECK-INTERLEAVE1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
1648; CHECK-INTERLEAVE1-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
1649; CHECK-INTERLEAVE1-NEXT:    [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
1650; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1651; CHECK-INTERLEAVE1:       pred.load.continue6:
1652; CHECK-INTERLEAVE1-NEXT:    [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
1653; CHECK-INTERLEAVE1-NEXT:    [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
1654; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
1655; CHECK-INTERLEAVE1:       pred.load.if7:
1656; CHECK-INTERLEAVE1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
1657; CHECK-INTERLEAVE1-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
1658; CHECK-INTERLEAVE1-NEXT:    [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
1659; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
1660; CHECK-INTERLEAVE1:       pred.load.continue8:
1661; CHECK-INTERLEAVE1-NEXT:    [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
1662; CHECK-INTERLEAVE1-NEXT:    [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
1663; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
1664; CHECK-INTERLEAVE1:       pred.load.if9:
1665; CHECK-INTERLEAVE1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
1666; CHECK-INTERLEAVE1-NEXT:    [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
1667; CHECK-INTERLEAVE1-NEXT:    [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
1668; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
1669; CHECK-INTERLEAVE1:       pred.load.continue10:
1670; CHECK-INTERLEAVE1-NEXT:    [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
1671; CHECK-INTERLEAVE1-NEXT:    [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
1672; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
1673; CHECK-INTERLEAVE1:       pred.load.if11:
1674; CHECK-INTERLEAVE1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
1675; CHECK-INTERLEAVE1-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
1676; CHECK-INTERLEAVE1-NEXT:    [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
1677; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
1678; CHECK-INTERLEAVE1:       pred.load.continue12:
1679; CHECK-INTERLEAVE1-NEXT:    [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
1680; CHECK-INTERLEAVE1-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
1681; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
1682; CHECK-INTERLEAVE1:       pred.load.if13:
1683; CHECK-INTERLEAVE1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
1684; CHECK-INTERLEAVE1-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
1685; CHECK-INTERLEAVE1-NEXT:    [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
1686; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1687; CHECK-INTERLEAVE1:       pred.load.continue14:
1688; CHECK-INTERLEAVE1-NEXT:    [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
1689; CHECK-INTERLEAVE1-NEXT:    [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
1690; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
1691; CHECK-INTERLEAVE1:       pred.load.if15:
1692; CHECK-INTERLEAVE1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
1693; CHECK-INTERLEAVE1-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
1694; CHECK-INTERLEAVE1-NEXT:    [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
1695; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
1696; CHECK-INTERLEAVE1:       pred.load.continue16:
1697; CHECK-INTERLEAVE1-NEXT:    [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
1698; CHECK-INTERLEAVE1-NEXT:    [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
1699; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
1700; CHECK-INTERLEAVE1:       pred.load.if17:
1701; CHECK-INTERLEAVE1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
1702; CHECK-INTERLEAVE1-NEXT:    [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
1703; CHECK-INTERLEAVE1-NEXT:    [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
1704; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
1705; CHECK-INTERLEAVE1:       pred.load.continue18:
1706; CHECK-INTERLEAVE1-NEXT:    [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
1707; CHECK-INTERLEAVE1-NEXT:    [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
1708; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
1709; CHECK-INTERLEAVE1:       pred.load.if19:
1710; CHECK-INTERLEAVE1-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
1711; CHECK-INTERLEAVE1-NEXT:    [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
1712; CHECK-INTERLEAVE1-NEXT:    [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
1713; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
1714; CHECK-INTERLEAVE1:       pred.load.continue20:
1715; CHECK-INTERLEAVE1-NEXT:    [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
1716; CHECK-INTERLEAVE1-NEXT:    [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
1717; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
1718; CHECK-INTERLEAVE1:       pred.load.if21:
1719; CHECK-INTERLEAVE1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
1720; CHECK-INTERLEAVE1-NEXT:    [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
1721; CHECK-INTERLEAVE1-NEXT:    [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
1722; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
1723; CHECK-INTERLEAVE1:       pred.load.continue22:
1724; CHECK-INTERLEAVE1-NEXT:    [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
1725; CHECK-INTERLEAVE1-NEXT:    [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
1726; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
1727; CHECK-INTERLEAVE1:       pred.load.if23:
1728; CHECK-INTERLEAVE1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
1729; CHECK-INTERLEAVE1-NEXT:    [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
1730; CHECK-INTERLEAVE1-NEXT:    [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
1731; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
1732; CHECK-INTERLEAVE1:       pred.load.continue24:
1733; CHECK-INTERLEAVE1-NEXT:    [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
1734; CHECK-INTERLEAVE1-NEXT:    [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
1735; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
1736; CHECK-INTERLEAVE1:       pred.load.if25:
1737; CHECK-INTERLEAVE1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
1738; CHECK-INTERLEAVE1-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
1739; CHECK-INTERLEAVE1-NEXT:    [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
1740; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
1741; CHECK-INTERLEAVE1:       pred.load.continue26:
1742; CHECK-INTERLEAVE1-NEXT:    [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
1743; CHECK-INTERLEAVE1-NEXT:    [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
1744; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
1745; CHECK-INTERLEAVE1:       pred.load.if27:
1746; CHECK-INTERLEAVE1-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
1747; CHECK-INTERLEAVE1-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
1748; CHECK-INTERLEAVE1-NEXT:    [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
1749; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
1750; CHECK-INTERLEAVE1:       pred.load.continue28:
1751; CHECK-INTERLEAVE1-NEXT:    [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
1752; CHECK-INTERLEAVE1-NEXT:    [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
1753; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
1754; CHECK-INTERLEAVE1:       pred.load.if29:
1755; CHECK-INTERLEAVE1-NEXT:    [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
1756; CHECK-INTERLEAVE1-NEXT:    [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
1757; CHECK-INTERLEAVE1-NEXT:    [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
1758; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
1759; CHECK-INTERLEAVE1:       pred.load.continue30:
1760; CHECK-INTERLEAVE1-NEXT:    [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
1761; CHECK-INTERLEAVE1-NEXT:    [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
1762; CHECK-INTERLEAVE1-NEXT:    [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
1763; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
1764; CHECK-INTERLEAVE1:       pred.load.if31:
1765; CHECK-INTERLEAVE1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
1766; CHECK-INTERLEAVE1-NEXT:    [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
1767; CHECK-INTERLEAVE1-NEXT:    [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
1768; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
1769; CHECK-INTERLEAVE1:       pred.load.continue32:
1770; CHECK-INTERLEAVE1-NEXT:    [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
1771; CHECK-INTERLEAVE1-NEXT:    [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
1772; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
1773; CHECK-INTERLEAVE1:       pred.load.if33:
1774; CHECK-INTERLEAVE1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1775; CHECK-INTERLEAVE1-NEXT:    [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
1776; CHECK-INTERLEAVE1-NEXT:    [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
1777; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
1778; CHECK-INTERLEAVE1:       pred.load.continue34:
1779; CHECK-INTERLEAVE1-NEXT:    [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
1780; CHECK-INTERLEAVE1-NEXT:    [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
1781; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
1782; CHECK-INTERLEAVE1:       pred.load.if35:
1783; CHECK-INTERLEAVE1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
1784; CHECK-INTERLEAVE1-NEXT:    [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
1785; CHECK-INTERLEAVE1-NEXT:    [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
1786; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
1787; CHECK-INTERLEAVE1:       pred.load.continue36:
1788; CHECK-INTERLEAVE1-NEXT:    [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
1789; CHECK-INTERLEAVE1-NEXT:    [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
1790; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
1791; CHECK-INTERLEAVE1:       pred.load.if37:
1792; CHECK-INTERLEAVE1-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
1793; CHECK-INTERLEAVE1-NEXT:    [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
1794; CHECK-INTERLEAVE1-NEXT:    [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
1795; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE38]]
1796; CHECK-INTERLEAVE1:       pred.load.continue38:
1797; CHECK-INTERLEAVE1-NEXT:    [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
1798; CHECK-INTERLEAVE1-NEXT:    [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
1799; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
1800; CHECK-INTERLEAVE1:       pred.load.if39:
1801; CHECK-INTERLEAVE1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
1802; CHECK-INTERLEAVE1-NEXT:    [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
1803; CHECK-INTERLEAVE1-NEXT:    [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
1804; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE40]]
1805; CHECK-INTERLEAVE1:       pred.load.continue40:
1806; CHECK-INTERLEAVE1-NEXT:    [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
1807; CHECK-INTERLEAVE1-NEXT:    [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
1808; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
1809; CHECK-INTERLEAVE1:       pred.load.if41:
1810; CHECK-INTERLEAVE1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
1811; CHECK-INTERLEAVE1-NEXT:    [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
1812; CHECK-INTERLEAVE1-NEXT:    [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
1813; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE42]]
1814; CHECK-INTERLEAVE1:       pred.load.continue42:
1815; CHECK-INTERLEAVE1-NEXT:    [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
1816; CHECK-INTERLEAVE1-NEXT:    [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
1817; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
1818; CHECK-INTERLEAVE1:       pred.load.if43:
1819; CHECK-INTERLEAVE1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
1820; CHECK-INTERLEAVE1-NEXT:    [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
1821; CHECK-INTERLEAVE1-NEXT:    [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
1822; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE44]]
1823; CHECK-INTERLEAVE1:       pred.load.continue44:
1824; CHECK-INTERLEAVE1-NEXT:    [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
1825; CHECK-INTERLEAVE1-NEXT:    [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
1826; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
1827; CHECK-INTERLEAVE1:       pred.load.if45:
1828; CHECK-INTERLEAVE1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
1829; CHECK-INTERLEAVE1-NEXT:    [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
1830; CHECK-INTERLEAVE1-NEXT:    [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
1831; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE46]]
1832; CHECK-INTERLEAVE1:       pred.load.continue46:
1833; CHECK-INTERLEAVE1-NEXT:    [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
1834; CHECK-INTERLEAVE1-NEXT:    [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
1835; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
1836; CHECK-INTERLEAVE1:       pred.load.if47:
1837; CHECK-INTERLEAVE1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
1838; CHECK-INTERLEAVE1-NEXT:    [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
1839; CHECK-INTERLEAVE1-NEXT:    [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
1840; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE48]]
1841; CHECK-INTERLEAVE1:       pred.load.continue48:
1842; CHECK-INTERLEAVE1-NEXT:    [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
1843; CHECK-INTERLEAVE1-NEXT:    [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
1844; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
1845; CHECK-INTERLEAVE1:       pred.load.if49:
1846; CHECK-INTERLEAVE1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
1847; CHECK-INTERLEAVE1-NEXT:    [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
1848; CHECK-INTERLEAVE1-NEXT:    [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
1849; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE50]]
1850; CHECK-INTERLEAVE1:       pred.load.continue50:
1851; CHECK-INTERLEAVE1-NEXT:    [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
1852; CHECK-INTERLEAVE1-NEXT:    [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
1853; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
1854; CHECK-INTERLEAVE1:       pred.load.if51:
1855; CHECK-INTERLEAVE1-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
1856; CHECK-INTERLEAVE1-NEXT:    [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
1857; CHECK-INTERLEAVE1-NEXT:    [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
1858; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE52]]
1859; CHECK-INTERLEAVE1:       pred.load.continue52:
1860; CHECK-INTERLEAVE1-NEXT:    [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
1861; CHECK-INTERLEAVE1-NEXT:    [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
1862; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
1863; CHECK-INTERLEAVE1:       pred.load.if53:
1864; CHECK-INTERLEAVE1-NEXT:    [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
1865; CHECK-INTERLEAVE1-NEXT:    [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
1866; CHECK-INTERLEAVE1-NEXT:    [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
1867; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE54]]
1868; CHECK-INTERLEAVE1:       pred.load.continue54:
1869; CHECK-INTERLEAVE1-NEXT:    [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
1870; CHECK-INTERLEAVE1-NEXT:    [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
1871; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
1872; CHECK-INTERLEAVE1:       pred.load.if55:
1873; CHECK-INTERLEAVE1-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
1874; CHECK-INTERLEAVE1-NEXT:    [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
1875; CHECK-INTERLEAVE1-NEXT:    [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
1876; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE56]]
1877; CHECK-INTERLEAVE1:       pred.load.continue56:
1878; CHECK-INTERLEAVE1-NEXT:    [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
1879; CHECK-INTERLEAVE1-NEXT:    [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
1880; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
1881; CHECK-INTERLEAVE1:       pred.load.if57:
1882; CHECK-INTERLEAVE1-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
1883; CHECK-INTERLEAVE1-NEXT:    [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
1884; CHECK-INTERLEAVE1-NEXT:    [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
1885; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE58]]
1886; CHECK-INTERLEAVE1:       pred.load.continue58:
1887; CHECK-INTERLEAVE1-NEXT:    [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
1888; CHECK-INTERLEAVE1-NEXT:    [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
1889; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
1890; CHECK-INTERLEAVE1:       pred.load.if59:
1891; CHECK-INTERLEAVE1-NEXT:    [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
1892; CHECK-INTERLEAVE1-NEXT:    [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
1893; CHECK-INTERLEAVE1-NEXT:    [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
1894; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE60]]
1895; CHECK-INTERLEAVE1:       pred.load.continue60:
1896; CHECK-INTERLEAVE1-NEXT:    [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
1897; CHECK-INTERLEAVE1-NEXT:    [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
1898; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
1899; CHECK-INTERLEAVE1:       pred.load.if61:
1900; CHECK-INTERLEAVE1-NEXT:    [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
1901; CHECK-INTERLEAVE1-NEXT:    [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
1902; CHECK-INTERLEAVE1-NEXT:    [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
1903; CHECK-INTERLEAVE1-NEXT:    br label [[PRED_LOAD_CONTINUE62]]
1904; CHECK-INTERLEAVE1:       pred.load.continue62:
1905; CHECK-INTERLEAVE1-NEXT:    [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
1906; CHECK-INTERLEAVE1-NEXT:    [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
1907; CHECK-INTERLEAVE1-NEXT:    [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
1908; CHECK-INTERLEAVE1-NEXT:    [[TMP180]] = add <16 x i32> [[TMP179]], [[VEC_PHI]]
1909; CHECK-INTERLEAVE1-NEXT:    [[TMP181:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP180]], <16 x i32> [[VEC_PHI]]
1910; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
1911; CHECK-INTERLEAVE1-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
1912; CHECK-INTERLEAVE1-NEXT:    [[TMP182:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1913; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP182]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1914; CHECK-INTERLEAVE1:       middle.block:
1915; CHECK-INTERLEAVE1-NEXT:    [[TMP183:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP181]])
1916; CHECK-INTERLEAVE1-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1917; CHECK-INTERLEAVE1:       scalar.ph:
1918; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1919; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP183]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1920; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
1921; CHECK-INTERLEAVE1:       for.body:
1922; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1923; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1924; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
1925; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
1926; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
1927; CHECK-INTERLEAVE1-NEXT:    [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
1928; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_A2]], align 1
1929; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
1930; CHECK-INTERLEAVE1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]]
1931; CHECK-INTERLEAVE1-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]]
1932; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1933; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1934; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1935; CHECK-INTERLEAVE1:       exit:
1936; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP183]], [[MIDDLE_BLOCK]] ]
1937; CHECK-INTERLEAVE1-NEXT:    ret i32 [[ADD_LCSSA]]
1938;
1939; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_predicated_pragma(
1940; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1941; CHECK-INTERLEAVED-NEXT:  entry:
1942; CHECK-INTERLEAVED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1943; CHECK-INTERLEAVED:       vector.ph:
1944; CHECK-INTERLEAVED-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], 15
1945; CHECK-INTERLEAVED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
1946; CHECK-INTERLEAVED-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1947; CHECK-INTERLEAVED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1948; CHECK-INTERLEAVED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1949; CHECK-INTERLEAVED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
1950; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
1951; CHECK-INTERLEAVED:       vector.body:
1952; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
1953; CHECK-INTERLEAVED-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
1954; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP180:%.*]], [[PRED_LOAD_CONTINUE62]] ]
1955; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1956; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1957; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1958; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1959; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1960; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1961; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1962; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1963; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1964; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1965; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1966; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1967; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1968; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1969; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1970; CHECK-INTERLEAVED-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1971; CHECK-INTERLEAVED-NEXT:    [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
1972; CHECK-INTERLEAVED-NEXT:    [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
1973; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1974; CHECK-INTERLEAVED:       pred.load.if:
1975; CHECK-INTERLEAVED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
1976; CHECK-INTERLEAVED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
1977; CHECK-INTERLEAVED-NEXT:    [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
1978; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1979; CHECK-INTERLEAVED:       pred.load.continue:
1980; CHECK-INTERLEAVED-NEXT:    [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
1981; CHECK-INTERLEAVED-NEXT:    [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
1982; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1983; CHECK-INTERLEAVED:       pred.load.if1:
1984; CHECK-INTERLEAVED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1985; CHECK-INTERLEAVED-NEXT:    [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
1986; CHECK-INTERLEAVED-NEXT:    [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
1987; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1988; CHECK-INTERLEAVED:       pred.load.continue2:
1989; CHECK-INTERLEAVED-NEXT:    [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
1990; CHECK-INTERLEAVED-NEXT:    [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
1991; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1992; CHECK-INTERLEAVED:       pred.load.if3:
1993; CHECK-INTERLEAVED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
1994; CHECK-INTERLEAVED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
1995; CHECK-INTERLEAVED-NEXT:    [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
1996; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1997; CHECK-INTERLEAVED:       pred.load.continue4:
1998; CHECK-INTERLEAVED-NEXT:    [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
1999; CHECK-INTERLEAVED-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
2000; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
2001; CHECK-INTERLEAVED:       pred.load.if5:
2002; CHECK-INTERLEAVED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
2003; CHECK-INTERLEAVED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
2004; CHECK-INTERLEAVED-NEXT:    [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
2005; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
2006; CHECK-INTERLEAVED:       pred.load.continue6:
2007; CHECK-INTERLEAVED-NEXT:    [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
2008; CHECK-INTERLEAVED-NEXT:    [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
2009; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
2010; CHECK-INTERLEAVED:       pred.load.if7:
2011; CHECK-INTERLEAVED-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
2012; CHECK-INTERLEAVED-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
2013; CHECK-INTERLEAVED-NEXT:    [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
2014; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
2015; CHECK-INTERLEAVED:       pred.load.continue8:
2016; CHECK-INTERLEAVED-NEXT:    [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
2017; CHECK-INTERLEAVED-NEXT:    [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
2018; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
2019; CHECK-INTERLEAVED:       pred.load.if9:
2020; CHECK-INTERLEAVED-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
2021; CHECK-INTERLEAVED-NEXT:    [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
2022; CHECK-INTERLEAVED-NEXT:    [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
2023; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
2024; CHECK-INTERLEAVED:       pred.load.continue10:
2025; CHECK-INTERLEAVED-NEXT:    [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
2026; CHECK-INTERLEAVED-NEXT:    [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
2027; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
2028; CHECK-INTERLEAVED:       pred.load.if11:
2029; CHECK-INTERLEAVED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
2030; CHECK-INTERLEAVED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
2031; CHECK-INTERLEAVED-NEXT:    [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
2032; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
2033; CHECK-INTERLEAVED:       pred.load.continue12:
2034; CHECK-INTERLEAVED-NEXT:    [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
2035; CHECK-INTERLEAVED-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
2036; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
2037; CHECK-INTERLEAVED:       pred.load.if13:
2038; CHECK-INTERLEAVED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
2039; CHECK-INTERLEAVED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
2040; CHECK-INTERLEAVED-NEXT:    [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
2041; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
2042; CHECK-INTERLEAVED:       pred.load.continue14:
2043; CHECK-INTERLEAVED-NEXT:    [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
2044; CHECK-INTERLEAVED-NEXT:    [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
2045; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
2046; CHECK-INTERLEAVED:       pred.load.if15:
2047; CHECK-INTERLEAVED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
2048; CHECK-INTERLEAVED-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
2049; CHECK-INTERLEAVED-NEXT:    [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
2050; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
2051; CHECK-INTERLEAVED:       pred.load.continue16:
2052; CHECK-INTERLEAVED-NEXT:    [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
2053; CHECK-INTERLEAVED-NEXT:    [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
2054; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
2055; CHECK-INTERLEAVED:       pred.load.if17:
2056; CHECK-INTERLEAVED-NEXT:    [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
2057; CHECK-INTERLEAVED-NEXT:    [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
2058; CHECK-INTERLEAVED-NEXT:    [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
2059; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
2060; CHECK-INTERLEAVED:       pred.load.continue18:
2061; CHECK-INTERLEAVED-NEXT:    [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
2062; CHECK-INTERLEAVED-NEXT:    [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
2063; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
2064; CHECK-INTERLEAVED:       pred.load.if19:
2065; CHECK-INTERLEAVED-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
2066; CHECK-INTERLEAVED-NEXT:    [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
2067; CHECK-INTERLEAVED-NEXT:    [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
2068; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
2069; CHECK-INTERLEAVED:       pred.load.continue20:
2070; CHECK-INTERLEAVED-NEXT:    [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
2071; CHECK-INTERLEAVED-NEXT:    [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
2072; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
2073; CHECK-INTERLEAVED:       pred.load.if21:
2074; CHECK-INTERLEAVED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
2075; CHECK-INTERLEAVED-NEXT:    [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
2076; CHECK-INTERLEAVED-NEXT:    [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
2077; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
2078; CHECK-INTERLEAVED:       pred.load.continue22:
2079; CHECK-INTERLEAVED-NEXT:    [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
2080; CHECK-INTERLEAVED-NEXT:    [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
2081; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
2082; CHECK-INTERLEAVED:       pred.load.if23:
2083; CHECK-INTERLEAVED-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
2084; CHECK-INTERLEAVED-NEXT:    [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
2085; CHECK-INTERLEAVED-NEXT:    [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
2086; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
2087; CHECK-INTERLEAVED:       pred.load.continue24:
2088; CHECK-INTERLEAVED-NEXT:    [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
2089; CHECK-INTERLEAVED-NEXT:    [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
2090; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
2091; CHECK-INTERLEAVED:       pred.load.if25:
2092; CHECK-INTERLEAVED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
2093; CHECK-INTERLEAVED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
2094; CHECK-INTERLEAVED-NEXT:    [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
2095; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
2096; CHECK-INTERLEAVED:       pred.load.continue26:
2097; CHECK-INTERLEAVED-NEXT:    [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
2098; CHECK-INTERLEAVED-NEXT:    [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
2099; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
2100; CHECK-INTERLEAVED:       pred.load.if27:
2101; CHECK-INTERLEAVED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
2102; CHECK-INTERLEAVED-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
2103; CHECK-INTERLEAVED-NEXT:    [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
2104; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
2105; CHECK-INTERLEAVED:       pred.load.continue28:
2106; CHECK-INTERLEAVED-NEXT:    [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
2107; CHECK-INTERLEAVED-NEXT:    [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
2108; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
2109; CHECK-INTERLEAVED:       pred.load.if29:
2110; CHECK-INTERLEAVED-NEXT:    [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
2111; CHECK-INTERLEAVED-NEXT:    [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
2112; CHECK-INTERLEAVED-NEXT:    [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
2113; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
2114; CHECK-INTERLEAVED:       pred.load.continue30:
2115; CHECK-INTERLEAVED-NEXT:    [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
2116; CHECK-INTERLEAVED-NEXT:    [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
2117; CHECK-INTERLEAVED-NEXT:    [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
2118; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
2119; CHECK-INTERLEAVED:       pred.load.if31:
2120; CHECK-INTERLEAVED-NEXT:    [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
2121; CHECK-INTERLEAVED-NEXT:    [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
2122; CHECK-INTERLEAVED-NEXT:    [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
2123; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
2124; CHECK-INTERLEAVED:       pred.load.continue32:
2125; CHECK-INTERLEAVED-NEXT:    [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
2126; CHECK-INTERLEAVED-NEXT:    [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
2127; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
2128; CHECK-INTERLEAVED:       pred.load.if33:
2129; CHECK-INTERLEAVED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
2130; CHECK-INTERLEAVED-NEXT:    [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
2131; CHECK-INTERLEAVED-NEXT:    [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
2132; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
2133; CHECK-INTERLEAVED:       pred.load.continue34:
2134; CHECK-INTERLEAVED-NEXT:    [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
2135; CHECK-INTERLEAVED-NEXT:    [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
2136; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
2137; CHECK-INTERLEAVED:       pred.load.if35:
2138; CHECK-INTERLEAVED-NEXT:    [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
2139; CHECK-INTERLEAVED-NEXT:    [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
2140; CHECK-INTERLEAVED-NEXT:    [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
2141; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
2142; CHECK-INTERLEAVED:       pred.load.continue36:
2143; CHECK-INTERLEAVED-NEXT:    [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
2144; CHECK-INTERLEAVED-NEXT:    [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
2145; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
2146; CHECK-INTERLEAVED:       pred.load.if37:
2147; CHECK-INTERLEAVED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
2148; CHECK-INTERLEAVED-NEXT:    [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
2149; CHECK-INTERLEAVED-NEXT:    [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
2150; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE38]]
2151; CHECK-INTERLEAVED:       pred.load.continue38:
2152; CHECK-INTERLEAVED-NEXT:    [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
2153; CHECK-INTERLEAVED-NEXT:    [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
2154; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
2155; CHECK-INTERLEAVED:       pred.load.if39:
2156; CHECK-INTERLEAVED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
2157; CHECK-INTERLEAVED-NEXT:    [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
2158; CHECK-INTERLEAVED-NEXT:    [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
2159; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE40]]
2160; CHECK-INTERLEAVED:       pred.load.continue40:
2161; CHECK-INTERLEAVED-NEXT:    [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
2162; CHECK-INTERLEAVED-NEXT:    [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
2163; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
2164; CHECK-INTERLEAVED:       pred.load.if41:
2165; CHECK-INTERLEAVED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
2166; CHECK-INTERLEAVED-NEXT:    [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
2167; CHECK-INTERLEAVED-NEXT:    [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
2168; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE42]]
2169; CHECK-INTERLEAVED:       pred.load.continue42:
2170; CHECK-INTERLEAVED-NEXT:    [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
2171; CHECK-INTERLEAVED-NEXT:    [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
2172; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
2173; CHECK-INTERLEAVED:       pred.load.if43:
2174; CHECK-INTERLEAVED-NEXT:    [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
2175; CHECK-INTERLEAVED-NEXT:    [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
2176; CHECK-INTERLEAVED-NEXT:    [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
2177; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE44]]
2178; CHECK-INTERLEAVED:       pred.load.continue44:
2179; CHECK-INTERLEAVED-NEXT:    [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
2180; CHECK-INTERLEAVED-NEXT:    [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
2181; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
2182; CHECK-INTERLEAVED:       pred.load.if45:
2183; CHECK-INTERLEAVED-NEXT:    [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
2184; CHECK-INTERLEAVED-NEXT:    [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
2185; CHECK-INTERLEAVED-NEXT:    [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
2186; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE46]]
2187; CHECK-INTERLEAVED:       pred.load.continue46:
2188; CHECK-INTERLEAVED-NEXT:    [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
2189; CHECK-INTERLEAVED-NEXT:    [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
2190; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
2191; CHECK-INTERLEAVED:       pred.load.if47:
2192; CHECK-INTERLEAVED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
2193; CHECK-INTERLEAVED-NEXT:    [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
2194; CHECK-INTERLEAVED-NEXT:    [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
2195; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE48]]
2196; CHECK-INTERLEAVED:       pred.load.continue48:
2197; CHECK-INTERLEAVED-NEXT:    [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
2198; CHECK-INTERLEAVED-NEXT:    [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
2199; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
2200; CHECK-INTERLEAVED:       pred.load.if49:
2201; CHECK-INTERLEAVED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
2202; CHECK-INTERLEAVED-NEXT:    [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
2203; CHECK-INTERLEAVED-NEXT:    [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
2204; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE50]]
2205; CHECK-INTERLEAVED:       pred.load.continue50:
2206; CHECK-INTERLEAVED-NEXT:    [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
2207; CHECK-INTERLEAVED-NEXT:    [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
2208; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
2209; CHECK-INTERLEAVED:       pred.load.if51:
2210; CHECK-INTERLEAVED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
2211; CHECK-INTERLEAVED-NEXT:    [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
2212; CHECK-INTERLEAVED-NEXT:    [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
2213; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE52]]
2214; CHECK-INTERLEAVED:       pred.load.continue52:
2215; CHECK-INTERLEAVED-NEXT:    [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
2216; CHECK-INTERLEAVED-NEXT:    [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
2217; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
2218; CHECK-INTERLEAVED:       pred.load.if53:
2219; CHECK-INTERLEAVED-NEXT:    [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
2220; CHECK-INTERLEAVED-NEXT:    [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
2221; CHECK-INTERLEAVED-NEXT:    [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
2222; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE54]]
2223; CHECK-INTERLEAVED:       pred.load.continue54:
2224; CHECK-INTERLEAVED-NEXT:    [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
2225; CHECK-INTERLEAVED-NEXT:    [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
2226; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
2227; CHECK-INTERLEAVED:       pred.load.if55:
2228; CHECK-INTERLEAVED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
2229; CHECK-INTERLEAVED-NEXT:    [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
2230; CHECK-INTERLEAVED-NEXT:    [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
2231; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE56]]
2232; CHECK-INTERLEAVED:       pred.load.continue56:
2233; CHECK-INTERLEAVED-NEXT:    [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
2234; CHECK-INTERLEAVED-NEXT:    [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
2235; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
2236; CHECK-INTERLEAVED:       pred.load.if57:
2237; CHECK-INTERLEAVED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
2238; CHECK-INTERLEAVED-NEXT:    [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
2239; CHECK-INTERLEAVED-NEXT:    [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
2240; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE58]]
2241; CHECK-INTERLEAVED:       pred.load.continue58:
2242; CHECK-INTERLEAVED-NEXT:    [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
2243; CHECK-INTERLEAVED-NEXT:    [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
2244; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
2245; CHECK-INTERLEAVED:       pred.load.if59:
2246; CHECK-INTERLEAVED-NEXT:    [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
2247; CHECK-INTERLEAVED-NEXT:    [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
2248; CHECK-INTERLEAVED-NEXT:    [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
2249; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE60]]
2250; CHECK-INTERLEAVED:       pred.load.continue60:
2251; CHECK-INTERLEAVED-NEXT:    [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
2252; CHECK-INTERLEAVED-NEXT:    [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
2253; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
2254; CHECK-INTERLEAVED:       pred.load.if61:
2255; CHECK-INTERLEAVED-NEXT:    [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
2256; CHECK-INTERLEAVED-NEXT:    [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
2257; CHECK-INTERLEAVED-NEXT:    [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
2258; CHECK-INTERLEAVED-NEXT:    br label [[PRED_LOAD_CONTINUE62]]
2259; CHECK-INTERLEAVED:       pred.load.continue62:
2260; CHECK-INTERLEAVED-NEXT:    [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
2261; CHECK-INTERLEAVED-NEXT:    [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
2262; CHECK-INTERLEAVED-NEXT:    [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
2263; CHECK-INTERLEAVED-NEXT:    [[TMP180]] = add <16 x i32> [[TMP179]], [[VEC_PHI]]
2264; CHECK-INTERLEAVED-NEXT:    [[TMP181:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP180]], <16 x i32> [[VEC_PHI]]
2265; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
2266; CHECK-INTERLEAVED-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
2267; CHECK-INTERLEAVED-NEXT:    [[TMP182:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2268; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP182]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2269; CHECK-INTERLEAVED:       middle.block:
2270; CHECK-INTERLEAVED-NEXT:    [[TMP183:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP181]])
2271; CHECK-INTERLEAVED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2272; CHECK-INTERLEAVED:       scalar.ph:
2273; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2274; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP183]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2275; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
2276; CHECK-INTERLEAVED:       for.body:
2277; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
2278; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
2279; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
2280; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
2281; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
2282; CHECK-INTERLEAVED-NEXT:    [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
2283; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_A2]], align 1
2284; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
2285; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]]
2286; CHECK-INTERLEAVED-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]]
2287; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
2288; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
2289; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2290; CHECK-INTERLEAVED:       exit:
2291; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP183]], [[MIDDLE_BLOCK]] ]
2292; CHECK-INTERLEAVED-NEXT:    ret i32 [[ADD_LCSSA]]
2293;
2294; CHECK-MAXBW-LABEL: define i32 @not_dotp_predicated_pragma(
2295; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2296; CHECK-MAXBW-NEXT:  entry:
2297; CHECK-MAXBW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2298; CHECK-MAXBW:       vector.ph:
2299; CHECK-MAXBW-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], 15
2300; CHECK-MAXBW-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
2301; CHECK-MAXBW-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
2302; CHECK-MAXBW-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
2303; CHECK-MAXBW-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
2304; CHECK-MAXBW-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
2305; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
2306; CHECK-MAXBW:       vector.body:
2307; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE62:%.*]] ]
2308; CHECK-MAXBW-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE62]] ]
2309; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP180:%.*]], [[PRED_LOAD_CONTINUE62]] ]
2310; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2311; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2312; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2313; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2314; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2315; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2316; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2317; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2318; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2319; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2320; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2321; CHECK-MAXBW-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2322; CHECK-MAXBW-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2323; CHECK-MAXBW-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2324; CHECK-MAXBW-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2325; CHECK-MAXBW-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2326; CHECK-MAXBW-NEXT:    [[TMP16:%.*]] = icmp ule <16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
2327; CHECK-MAXBW-NEXT:    [[TMP17:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
2328; CHECK-MAXBW-NEXT:    br i1 [[TMP17]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
2329; CHECK-MAXBW:       pred.load.if:
2330; CHECK-MAXBW-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
2331; CHECK-MAXBW-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
2332; CHECK-MAXBW-NEXT:    [[TMP20:%.*]] = insertelement <16 x i8> poison, i8 [[TMP19]], i32 0
2333; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE]]
2334; CHECK-MAXBW:       pred.load.continue:
2335; CHECK-MAXBW-NEXT:    [[TMP21:%.*]] = phi <16 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_LOAD_IF]] ]
2336; CHECK-MAXBW-NEXT:    [[TMP22:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
2337; CHECK-MAXBW-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
2338; CHECK-MAXBW:       pred.load.if1:
2339; CHECK-MAXBW-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
2340; CHECK-MAXBW-NEXT:    [[TMP24:%.*]] = load i8, ptr [[TMP23]], align 1
2341; CHECK-MAXBW-NEXT:    [[TMP25:%.*]] = insertelement <16 x i8> [[TMP21]], i8 [[TMP24]], i32 1
2342; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
2343; CHECK-MAXBW:       pred.load.continue2:
2344; CHECK-MAXBW-NEXT:    [[TMP26:%.*]] = phi <16 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP25]], [[PRED_LOAD_IF1]] ]
2345; CHECK-MAXBW-NEXT:    [[TMP27:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
2346; CHECK-MAXBW-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
2347; CHECK-MAXBW:       pred.load.if3:
2348; CHECK-MAXBW-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP2]]
2349; CHECK-MAXBW-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
2350; CHECK-MAXBW-NEXT:    [[TMP30:%.*]] = insertelement <16 x i8> [[TMP26]], i8 [[TMP29]], i32 2
2351; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
2352; CHECK-MAXBW:       pred.load.continue4:
2353; CHECK-MAXBW-NEXT:    [[TMP31:%.*]] = phi <16 x i8> [ [[TMP26]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP30]], [[PRED_LOAD_IF3]] ]
2354; CHECK-MAXBW-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
2355; CHECK-MAXBW-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
2356; CHECK-MAXBW:       pred.load.if5:
2357; CHECK-MAXBW-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
2358; CHECK-MAXBW-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
2359; CHECK-MAXBW-NEXT:    [[TMP35:%.*]] = insertelement <16 x i8> [[TMP31]], i8 [[TMP34]], i32 3
2360; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
2361; CHECK-MAXBW:       pred.load.continue6:
2362; CHECK-MAXBW-NEXT:    [[TMP36:%.*]] = phi <16 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ]
2363; CHECK-MAXBW-NEXT:    [[TMP37:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
2364; CHECK-MAXBW-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
2365; CHECK-MAXBW:       pred.load.if7:
2366; CHECK-MAXBW-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
2367; CHECK-MAXBW-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
2368; CHECK-MAXBW-NEXT:    [[TMP40:%.*]] = insertelement <16 x i8> [[TMP36]], i8 [[TMP39]], i32 4
2369; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
2370; CHECK-MAXBW:       pred.load.continue8:
2371; CHECK-MAXBW-NEXT:    [[TMP41:%.*]] = phi <16 x i8> [ [[TMP36]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], [[PRED_LOAD_IF7]] ]
2372; CHECK-MAXBW-NEXT:    [[TMP42:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
2373; CHECK-MAXBW-NEXT:    br i1 [[TMP42]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
2374; CHECK-MAXBW:       pred.load.if9:
2375; CHECK-MAXBW-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
2376; CHECK-MAXBW-NEXT:    [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
2377; CHECK-MAXBW-NEXT:    [[TMP45:%.*]] = insertelement <16 x i8> [[TMP41]], i8 [[TMP44]], i32 5
2378; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
2379; CHECK-MAXBW:       pred.load.continue10:
2380; CHECK-MAXBW-NEXT:    [[TMP46:%.*]] = phi <16 x i8> [ [[TMP41]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], [[PRED_LOAD_IF9]] ]
2381; CHECK-MAXBW-NEXT:    [[TMP47:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
2382; CHECK-MAXBW-NEXT:    br i1 [[TMP47]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
2383; CHECK-MAXBW:       pred.load.if11:
2384; CHECK-MAXBW-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
2385; CHECK-MAXBW-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
2386; CHECK-MAXBW-NEXT:    [[TMP50:%.*]] = insertelement <16 x i8> [[TMP46]], i8 [[TMP49]], i32 6
2387; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
2388; CHECK-MAXBW:       pred.load.continue12:
2389; CHECK-MAXBW-NEXT:    [[TMP51:%.*]] = phi <16 x i8> [ [[TMP46]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], [[PRED_LOAD_IF11]] ]
2390; CHECK-MAXBW-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
2391; CHECK-MAXBW-NEXT:    br i1 [[TMP52]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
2392; CHECK-MAXBW:       pred.load.if13:
2393; CHECK-MAXBW-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP7]]
2394; CHECK-MAXBW-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
2395; CHECK-MAXBW-NEXT:    [[TMP55:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP54]], i32 7
2396; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
2397; CHECK-MAXBW:       pred.load.continue14:
2398; CHECK-MAXBW-NEXT:    [[TMP56:%.*]] = phi <16 x i8> [ [[TMP51]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], [[PRED_LOAD_IF13]] ]
2399; CHECK-MAXBW-NEXT:    [[TMP57:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
2400; CHECK-MAXBW-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
2401; CHECK-MAXBW:       pred.load.if15:
2402; CHECK-MAXBW-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP8]]
2403; CHECK-MAXBW-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
2404; CHECK-MAXBW-NEXT:    [[TMP60:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP59]], i32 8
2405; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
2406; CHECK-MAXBW:       pred.load.continue16:
2407; CHECK-MAXBW-NEXT:    [[TMP61:%.*]] = phi <16 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], [[PRED_LOAD_IF15]] ]
2408; CHECK-MAXBW-NEXT:    [[TMP62:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
2409; CHECK-MAXBW-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
2410; CHECK-MAXBW:       pred.load.if17:
2411; CHECK-MAXBW-NEXT:    [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
2412; CHECK-MAXBW-NEXT:    [[TMP64:%.*]] = load i8, ptr [[TMP63]], align 1
2413; CHECK-MAXBW-NEXT:    [[TMP65:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP64]], i32 9
2414; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
2415; CHECK-MAXBW:       pred.load.continue18:
2416; CHECK-MAXBW-NEXT:    [[TMP66:%.*]] = phi <16 x i8> [ [[TMP61]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP65]], [[PRED_LOAD_IF17]] ]
2417; CHECK-MAXBW-NEXT:    [[TMP67:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
2418; CHECK-MAXBW-NEXT:    br i1 [[TMP67]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
2419; CHECK-MAXBW:       pred.load.if19:
2420; CHECK-MAXBW-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]]
2421; CHECK-MAXBW-NEXT:    [[TMP69:%.*]] = load i8, ptr [[TMP68]], align 1
2422; CHECK-MAXBW-NEXT:    [[TMP70:%.*]] = insertelement <16 x i8> [[TMP66]], i8 [[TMP69]], i32 10
2423; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
2424; CHECK-MAXBW:       pred.load.continue20:
2425; CHECK-MAXBW-NEXT:    [[TMP71:%.*]] = phi <16 x i8> [ [[TMP66]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP70]], [[PRED_LOAD_IF19]] ]
2426; CHECK-MAXBW-NEXT:    [[TMP72:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
2427; CHECK-MAXBW-NEXT:    br i1 [[TMP72]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
2428; CHECK-MAXBW:       pred.load.if21:
2429; CHECK-MAXBW-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP11]]
2430; CHECK-MAXBW-NEXT:    [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
2431; CHECK-MAXBW-NEXT:    [[TMP75:%.*]] = insertelement <16 x i8> [[TMP71]], i8 [[TMP74]], i32 11
2432; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
2433; CHECK-MAXBW:       pred.load.continue22:
2434; CHECK-MAXBW-NEXT:    [[TMP76:%.*]] = phi <16 x i8> [ [[TMP71]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP75]], [[PRED_LOAD_IF21]] ]
2435; CHECK-MAXBW-NEXT:    [[TMP77:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
2436; CHECK-MAXBW-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
2437; CHECK-MAXBW:       pred.load.if23:
2438; CHECK-MAXBW-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
2439; CHECK-MAXBW-NEXT:    [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1
2440; CHECK-MAXBW-NEXT:    [[TMP80:%.*]] = insertelement <16 x i8> [[TMP76]], i8 [[TMP79]], i32 12
2441; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
2442; CHECK-MAXBW:       pred.load.continue24:
2443; CHECK-MAXBW-NEXT:    [[TMP81:%.*]] = phi <16 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP80]], [[PRED_LOAD_IF23]] ]
2444; CHECK-MAXBW-NEXT:    [[TMP82:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
2445; CHECK-MAXBW-NEXT:    br i1 [[TMP82]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
2446; CHECK-MAXBW:       pred.load.if25:
2447; CHECK-MAXBW-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP13]]
2448; CHECK-MAXBW-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
2449; CHECK-MAXBW-NEXT:    [[TMP85:%.*]] = insertelement <16 x i8> [[TMP81]], i8 [[TMP84]], i32 13
2450; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
2451; CHECK-MAXBW:       pred.load.continue26:
2452; CHECK-MAXBW-NEXT:    [[TMP86:%.*]] = phi <16 x i8> [ [[TMP81]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
2453; CHECK-MAXBW-NEXT:    [[TMP87:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
2454; CHECK-MAXBW-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
2455; CHECK-MAXBW:       pred.load.if27:
2456; CHECK-MAXBW-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
2457; CHECK-MAXBW-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
2458; CHECK-MAXBW-NEXT:    [[TMP90:%.*]] = insertelement <16 x i8> [[TMP86]], i8 [[TMP89]], i32 14
2459; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
2460; CHECK-MAXBW:       pred.load.continue28:
2461; CHECK-MAXBW-NEXT:    [[TMP91:%.*]] = phi <16 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP90]], [[PRED_LOAD_IF27]] ]
2462; CHECK-MAXBW-NEXT:    [[TMP92:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
2463; CHECK-MAXBW-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
2464; CHECK-MAXBW:       pred.load.if29:
2465; CHECK-MAXBW-NEXT:    [[TMP93:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]]
2466; CHECK-MAXBW-NEXT:    [[TMP94:%.*]] = load i8, ptr [[TMP93]], align 1
2467; CHECK-MAXBW-NEXT:    [[TMP95:%.*]] = insertelement <16 x i8> [[TMP91]], i8 [[TMP94]], i32 15
2468; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
2469; CHECK-MAXBW:       pred.load.continue30:
2470; CHECK-MAXBW-NEXT:    [[TMP96:%.*]] = phi <16 x i8> [ [[TMP91]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP95]], [[PRED_LOAD_IF29]] ]
2471; CHECK-MAXBW-NEXT:    [[TMP97:%.*]] = sext <16 x i8> [[TMP96]] to <16 x i32>
2472; CHECK-MAXBW-NEXT:    [[TMP98:%.*]] = extractelement <16 x i1> [[TMP16]], i32 0
2473; CHECK-MAXBW-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
2474; CHECK-MAXBW:       pred.load.if31:
2475; CHECK-MAXBW-NEXT:    [[TMP99:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
2476; CHECK-MAXBW-NEXT:    [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1
2477; CHECK-MAXBW-NEXT:    [[TMP101:%.*]] = insertelement <16 x i8> poison, i8 [[TMP100]], i32 0
2478; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
2479; CHECK-MAXBW:       pred.load.continue32:
2480; CHECK-MAXBW-NEXT:    [[TMP102:%.*]] = phi <16 x i8> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP101]], [[PRED_LOAD_IF31]] ]
2481; CHECK-MAXBW-NEXT:    [[TMP103:%.*]] = extractelement <16 x i1> [[TMP16]], i32 1
2482; CHECK-MAXBW-NEXT:    br i1 [[TMP103]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
2483; CHECK-MAXBW:       pred.load.if33:
2484; CHECK-MAXBW-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
2485; CHECK-MAXBW-NEXT:    [[TMP105:%.*]] = load i8, ptr [[TMP104]], align 1
2486; CHECK-MAXBW-NEXT:    [[TMP106:%.*]] = insertelement <16 x i8> [[TMP102]], i8 [[TMP105]], i32 1
2487; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
2488; CHECK-MAXBW:       pred.load.continue34:
2489; CHECK-MAXBW-NEXT:    [[TMP107:%.*]] = phi <16 x i8> [ [[TMP102]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP106]], [[PRED_LOAD_IF33]] ]
2490; CHECK-MAXBW-NEXT:    [[TMP108:%.*]] = extractelement <16 x i1> [[TMP16]], i32 2
2491; CHECK-MAXBW-NEXT:    br i1 [[TMP108]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
2492; CHECK-MAXBW:       pred.load.if35:
2493; CHECK-MAXBW-NEXT:    [[TMP109:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
2494; CHECK-MAXBW-NEXT:    [[TMP110:%.*]] = load i8, ptr [[TMP109]], align 1
2495; CHECK-MAXBW-NEXT:    [[TMP111:%.*]] = insertelement <16 x i8> [[TMP107]], i8 [[TMP110]], i32 2
2496; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
2497; CHECK-MAXBW:       pred.load.continue36:
2498; CHECK-MAXBW-NEXT:    [[TMP112:%.*]] = phi <16 x i8> [ [[TMP107]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP111]], [[PRED_LOAD_IF35]] ]
2499; CHECK-MAXBW-NEXT:    [[TMP113:%.*]] = extractelement <16 x i1> [[TMP16]], i32 3
2500; CHECK-MAXBW-NEXT:    br i1 [[TMP113]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
2501; CHECK-MAXBW:       pred.load.if37:
2502; CHECK-MAXBW-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
2503; CHECK-MAXBW-NEXT:    [[TMP115:%.*]] = load i8, ptr [[TMP114]], align 1
2504; CHECK-MAXBW-NEXT:    [[TMP116:%.*]] = insertelement <16 x i8> [[TMP112]], i8 [[TMP115]], i32 3
2505; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE38]]
2506; CHECK-MAXBW:       pred.load.continue38:
2507; CHECK-MAXBW-NEXT:    [[TMP117:%.*]] = phi <16 x i8> [ [[TMP112]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP116]], [[PRED_LOAD_IF37]] ]
2508; CHECK-MAXBW-NEXT:    [[TMP118:%.*]] = extractelement <16 x i1> [[TMP16]], i32 4
2509; CHECK-MAXBW-NEXT:    br i1 [[TMP118]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
2510; CHECK-MAXBW:       pred.load.if39:
2511; CHECK-MAXBW-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
2512; CHECK-MAXBW-NEXT:    [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1
2513; CHECK-MAXBW-NEXT:    [[TMP121:%.*]] = insertelement <16 x i8> [[TMP117]], i8 [[TMP120]], i32 4
2514; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE40]]
2515; CHECK-MAXBW:       pred.load.continue40:
2516; CHECK-MAXBW-NEXT:    [[TMP122:%.*]] = phi <16 x i8> [ [[TMP117]], [[PRED_LOAD_CONTINUE38]] ], [ [[TMP121]], [[PRED_LOAD_IF39]] ]
2517; CHECK-MAXBW-NEXT:    [[TMP123:%.*]] = extractelement <16 x i1> [[TMP16]], i32 5
2518; CHECK-MAXBW-NEXT:    br i1 [[TMP123]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
2519; CHECK-MAXBW:       pred.load.if41:
2520; CHECK-MAXBW-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
2521; CHECK-MAXBW-NEXT:    [[TMP125:%.*]] = load i8, ptr [[TMP124]], align 1
2522; CHECK-MAXBW-NEXT:    [[TMP126:%.*]] = insertelement <16 x i8> [[TMP122]], i8 [[TMP125]], i32 5
2523; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE42]]
2524; CHECK-MAXBW:       pred.load.continue42:
2525; CHECK-MAXBW-NEXT:    [[TMP127:%.*]] = phi <16 x i8> [ [[TMP122]], [[PRED_LOAD_CONTINUE40]] ], [ [[TMP126]], [[PRED_LOAD_IF41]] ]
2526; CHECK-MAXBW-NEXT:    [[TMP128:%.*]] = extractelement <16 x i1> [[TMP16]], i32 6
2527; CHECK-MAXBW-NEXT:    br i1 [[TMP128]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44:%.*]]
2528; CHECK-MAXBW:       pred.load.if43:
2529; CHECK-MAXBW-NEXT:    [[TMP129:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
2530; CHECK-MAXBW-NEXT:    [[TMP130:%.*]] = load i8, ptr [[TMP129]], align 1
2531; CHECK-MAXBW-NEXT:    [[TMP131:%.*]] = insertelement <16 x i8> [[TMP127]], i8 [[TMP130]], i32 6
2532; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE44]]
2533; CHECK-MAXBW:       pred.load.continue44:
2534; CHECK-MAXBW-NEXT:    [[TMP132:%.*]] = phi <16 x i8> [ [[TMP127]], [[PRED_LOAD_CONTINUE42]] ], [ [[TMP131]], [[PRED_LOAD_IF43]] ]
2535; CHECK-MAXBW-NEXT:    [[TMP133:%.*]] = extractelement <16 x i1> [[TMP16]], i32 7
2536; CHECK-MAXBW-NEXT:    br i1 [[TMP133]], label [[PRED_LOAD_IF45:%.*]], label [[PRED_LOAD_CONTINUE46:%.*]]
2537; CHECK-MAXBW:       pred.load.if45:
2538; CHECK-MAXBW-NEXT:    [[TMP134:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
2539; CHECK-MAXBW-NEXT:    [[TMP135:%.*]] = load i8, ptr [[TMP134]], align 1
2540; CHECK-MAXBW-NEXT:    [[TMP136:%.*]] = insertelement <16 x i8> [[TMP132]], i8 [[TMP135]], i32 7
2541; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE46]]
2542; CHECK-MAXBW:       pred.load.continue46:
2543; CHECK-MAXBW-NEXT:    [[TMP137:%.*]] = phi <16 x i8> [ [[TMP132]], [[PRED_LOAD_CONTINUE44]] ], [ [[TMP136]], [[PRED_LOAD_IF45]] ]
2544; CHECK-MAXBW-NEXT:    [[TMP138:%.*]] = extractelement <16 x i1> [[TMP16]], i32 8
2545; CHECK-MAXBW-NEXT:    br i1 [[TMP138]], label [[PRED_LOAD_IF47:%.*]], label [[PRED_LOAD_CONTINUE48:%.*]]
2546; CHECK-MAXBW:       pred.load.if47:
2547; CHECK-MAXBW-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]]
2548; CHECK-MAXBW-NEXT:    [[TMP140:%.*]] = load i8, ptr [[TMP139]], align 1
2549; CHECK-MAXBW-NEXT:    [[TMP141:%.*]] = insertelement <16 x i8> [[TMP137]], i8 [[TMP140]], i32 8
2550; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE48]]
2551; CHECK-MAXBW:       pred.load.continue48:
2552; CHECK-MAXBW-NEXT:    [[TMP142:%.*]] = phi <16 x i8> [ [[TMP137]], [[PRED_LOAD_CONTINUE46]] ], [ [[TMP141]], [[PRED_LOAD_IF47]] ]
2553; CHECK-MAXBW-NEXT:    [[TMP143:%.*]] = extractelement <16 x i1> [[TMP16]], i32 9
2554; CHECK-MAXBW-NEXT:    br i1 [[TMP143]], label [[PRED_LOAD_IF49:%.*]], label [[PRED_LOAD_CONTINUE50:%.*]]
2555; CHECK-MAXBW:       pred.load.if49:
2556; CHECK-MAXBW-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
2557; CHECK-MAXBW-NEXT:    [[TMP145:%.*]] = load i8, ptr [[TMP144]], align 1
2558; CHECK-MAXBW-NEXT:    [[TMP146:%.*]] = insertelement <16 x i8> [[TMP142]], i8 [[TMP145]], i32 9
2559; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE50]]
2560; CHECK-MAXBW:       pred.load.continue50:
2561; CHECK-MAXBW-NEXT:    [[TMP147:%.*]] = phi <16 x i8> [ [[TMP142]], [[PRED_LOAD_CONTINUE48]] ], [ [[TMP146]], [[PRED_LOAD_IF49]] ]
2562; CHECK-MAXBW-NEXT:    [[TMP148:%.*]] = extractelement <16 x i1> [[TMP16]], i32 10
2563; CHECK-MAXBW-NEXT:    br i1 [[TMP148]], label [[PRED_LOAD_IF51:%.*]], label [[PRED_LOAD_CONTINUE52:%.*]]
2564; CHECK-MAXBW:       pred.load.if51:
2565; CHECK-MAXBW-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
2566; CHECK-MAXBW-NEXT:    [[TMP150:%.*]] = load i8, ptr [[TMP149]], align 1
2567; CHECK-MAXBW-NEXT:    [[TMP151:%.*]] = insertelement <16 x i8> [[TMP147]], i8 [[TMP150]], i32 10
2568; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE52]]
2569; CHECK-MAXBW:       pred.load.continue52:
2570; CHECK-MAXBW-NEXT:    [[TMP152:%.*]] = phi <16 x i8> [ [[TMP147]], [[PRED_LOAD_CONTINUE50]] ], [ [[TMP151]], [[PRED_LOAD_IF51]] ]
2571; CHECK-MAXBW-NEXT:    [[TMP153:%.*]] = extractelement <16 x i1> [[TMP16]], i32 11
2572; CHECK-MAXBW-NEXT:    br i1 [[TMP153]], label [[PRED_LOAD_IF53:%.*]], label [[PRED_LOAD_CONTINUE54:%.*]]
2573; CHECK-MAXBW:       pred.load.if53:
2574; CHECK-MAXBW-NEXT:    [[TMP154:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
2575; CHECK-MAXBW-NEXT:    [[TMP155:%.*]] = load i8, ptr [[TMP154]], align 1
2576; CHECK-MAXBW-NEXT:    [[TMP156:%.*]] = insertelement <16 x i8> [[TMP152]], i8 [[TMP155]], i32 11
2577; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE54]]
2578; CHECK-MAXBW:       pred.load.continue54:
2579; CHECK-MAXBW-NEXT:    [[TMP157:%.*]] = phi <16 x i8> [ [[TMP152]], [[PRED_LOAD_CONTINUE52]] ], [ [[TMP156]], [[PRED_LOAD_IF53]] ]
2580; CHECK-MAXBW-NEXT:    [[TMP158:%.*]] = extractelement <16 x i1> [[TMP16]], i32 12
2581; CHECK-MAXBW-NEXT:    br i1 [[TMP158]], label [[PRED_LOAD_IF55:%.*]], label [[PRED_LOAD_CONTINUE56:%.*]]
2582; CHECK-MAXBW:       pred.load.if55:
2583; CHECK-MAXBW-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
2584; CHECK-MAXBW-NEXT:    [[TMP160:%.*]] = load i8, ptr [[TMP159]], align 1
2585; CHECK-MAXBW-NEXT:    [[TMP161:%.*]] = insertelement <16 x i8> [[TMP157]], i8 [[TMP160]], i32 12
2586; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE56]]
2587; CHECK-MAXBW:       pred.load.continue56:
2588; CHECK-MAXBW-NEXT:    [[TMP162:%.*]] = phi <16 x i8> [ [[TMP157]], [[PRED_LOAD_CONTINUE54]] ], [ [[TMP161]], [[PRED_LOAD_IF55]] ]
2589; CHECK-MAXBW-NEXT:    [[TMP163:%.*]] = extractelement <16 x i1> [[TMP16]], i32 13
2590; CHECK-MAXBW-NEXT:    br i1 [[TMP163]], label [[PRED_LOAD_IF57:%.*]], label [[PRED_LOAD_CONTINUE58:%.*]]
2591; CHECK-MAXBW:       pred.load.if57:
2592; CHECK-MAXBW-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]]
2593; CHECK-MAXBW-NEXT:    [[TMP165:%.*]] = load i8, ptr [[TMP164]], align 1
2594; CHECK-MAXBW-NEXT:    [[TMP166:%.*]] = insertelement <16 x i8> [[TMP162]], i8 [[TMP165]], i32 13
2595; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE58]]
2596; CHECK-MAXBW:       pred.load.continue58:
2597; CHECK-MAXBW-NEXT:    [[TMP167:%.*]] = phi <16 x i8> [ [[TMP162]], [[PRED_LOAD_CONTINUE56]] ], [ [[TMP166]], [[PRED_LOAD_IF57]] ]
2598; CHECK-MAXBW-NEXT:    [[TMP168:%.*]] = extractelement <16 x i1> [[TMP16]], i32 14
2599; CHECK-MAXBW-NEXT:    br i1 [[TMP168]], label [[PRED_LOAD_IF59:%.*]], label [[PRED_LOAD_CONTINUE60:%.*]]
2600; CHECK-MAXBW:       pred.load.if59:
2601; CHECK-MAXBW-NEXT:    [[TMP169:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
2602; CHECK-MAXBW-NEXT:    [[TMP170:%.*]] = load i8, ptr [[TMP169]], align 1
2603; CHECK-MAXBW-NEXT:    [[TMP171:%.*]] = insertelement <16 x i8> [[TMP167]], i8 [[TMP170]], i32 14
2604; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE60]]
2605; CHECK-MAXBW:       pred.load.continue60:
2606; CHECK-MAXBW-NEXT:    [[TMP172:%.*]] = phi <16 x i8> [ [[TMP167]], [[PRED_LOAD_CONTINUE58]] ], [ [[TMP171]], [[PRED_LOAD_IF59]] ]
2607; CHECK-MAXBW-NEXT:    [[TMP173:%.*]] = extractelement <16 x i1> [[TMP16]], i32 15
2608; CHECK-MAXBW-NEXT:    br i1 [[TMP173]], label [[PRED_LOAD_IF61:%.*]], label [[PRED_LOAD_CONTINUE62]]
2609; CHECK-MAXBW:       pred.load.if61:
2610; CHECK-MAXBW-NEXT:    [[TMP174:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]]
2611; CHECK-MAXBW-NEXT:    [[TMP175:%.*]] = load i8, ptr [[TMP174]], align 1
2612; CHECK-MAXBW-NEXT:    [[TMP176:%.*]] = insertelement <16 x i8> [[TMP172]], i8 [[TMP175]], i32 15
2613; CHECK-MAXBW-NEXT:    br label [[PRED_LOAD_CONTINUE62]]
2614; CHECK-MAXBW:       pred.load.continue62:
2615; CHECK-MAXBW-NEXT:    [[TMP177:%.*]] = phi <16 x i8> [ [[TMP172]], [[PRED_LOAD_CONTINUE60]] ], [ [[TMP176]], [[PRED_LOAD_IF61]] ]
2616; CHECK-MAXBW-NEXT:    [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32>
2617; CHECK-MAXBW-NEXT:    [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
2618; CHECK-MAXBW-NEXT:    [[TMP180]] = add <16 x i32> [[TMP179]], [[VEC_PHI]]
2619; CHECK-MAXBW-NEXT:    [[TMP181:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP180]], <16 x i32> [[VEC_PHI]]
2620; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
2621; CHECK-MAXBW-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
2622; CHECK-MAXBW-NEXT:    [[TMP182:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2623; CHECK-MAXBW-NEXT:    br i1 [[TMP182]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2624; CHECK-MAXBW:       middle.block:
2625; CHECK-MAXBW-NEXT:    [[TMP183:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP181]])
2626; CHECK-MAXBW-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2627; CHECK-MAXBW:       scalar.ph:
2628; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2629; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP183]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2630; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
2631; CHECK-MAXBW:       for.body:
2632; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
2633; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
2634; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
2635; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
2636; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
2637; CHECK-MAXBW-NEXT:    [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
2638; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_A2]], align 1
2639; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
2640; CHECK-MAXBW-NEXT:    [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]]
2641; CHECK-MAXBW-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]]
2642; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
2643; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
2644; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2645; CHECK-MAXBW:       exit:
2646; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP183]], [[MIDDLE_BLOCK]] ]
2647; CHECK-MAXBW-NEXT:    ret i32 [[ADD_LCSSA]]
2648;
2649entry:
2650  br label %for.body
2651
2652for.body:                                         ; preds = %entry, %for.body
2653  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2654  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
2655  %gep.a = getelementptr inbounds i8, ptr %b, i64 %iv
2656  %load.a = load i8, ptr %gep.a, align 1
2657  %ext.a = sext i8 %load.a to i32
2658  %gep.a2 = getelementptr inbounds i8, ptr %a, i64 %iv
2659  %load.b = load i8, ptr %gep.a2, align 1
2660  %ext.b = sext i8 %load.b to i32
2661  %mul = mul nsw i32 %ext.b, %ext.a
2662  %add = add nsw i32 %mul, %accum
2663  %iv.next = add nuw nsw i64 %iv, 1
2664  %exitcond.not = icmp eq i64 %iv.next, %N
2665  br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !7
2666
2667exit:                        ; preds = %for.body
2668  ret i32 %add
2669}
2670
2671define i32 @not_dotp_extend_user(ptr %a, ptr %b) {
2672; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_extend_user(
2673; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2674; CHECK-INTERLEAVE1-NEXT:  entry:
2675; CHECK-INTERLEAVE1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2676; CHECK-INTERLEAVE1:       vector.ph:
2677; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
2678; CHECK-INTERLEAVE1:       vector.body:
2679; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2680; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
2681; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2682; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
2683; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
2684; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2685; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
2686; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
2687; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
2688; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
2689; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
2690; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
2691; CHECK-INTERLEAVE1-NEXT:    [[TMP8]] = add <16 x i32> [[TMP7]], [[VEC_PHI]]
2692; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2693; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2694; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2695; CHECK-INTERLEAVE1:       middle.block:
2696; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
2697; CHECK-INTERLEAVE1-NEXT:    [[TMP11:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
2698; CHECK-INTERLEAVE1-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
2699; CHECK-INTERLEAVE1:       scalar.ph:
2700; CHECK-INTERLEAVE1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2701; CHECK-INTERLEAVE1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2702; CHECK-INTERLEAVE1-NEXT:    br label [[FOR_BODY:%.*]]
2703; CHECK-INTERLEAVE1:       for.body:
2704; CHECK-INTERLEAVE1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
2705; CHECK-INTERLEAVE1-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
2706; CHECK-INTERLEAVE1-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
2707; CHECK-INTERLEAVE1-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
2708; CHECK-INTERLEAVE1-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
2709; CHECK-INTERLEAVE1-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
2710; CHECK-INTERLEAVE1-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
2711; CHECK-INTERLEAVE1-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
2712; CHECK-INTERLEAVE1-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
2713; CHECK-INTERLEAVE1-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
2714; CHECK-INTERLEAVE1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2715; CHECK-INTERLEAVE1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
2716; CHECK-INTERLEAVE1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
2717; CHECK-INTERLEAVE1:       for.exit:
2718; CHECK-INTERLEAVE1-NEXT:    [[EXT_B_LCSSA:%.*]] = phi i32 [ [[EXT_B]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
2719; CHECK-INTERLEAVE1-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
2720; CHECK-INTERLEAVE1-NEXT:    [[RESULT:%.*]] = add i32 [[ADD_LCSSA]], [[EXT_B_LCSSA]]
2721; CHECK-INTERLEAVE1-NEXT:    ret i32 [[RESULT]]
2722;
2723; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_extend_user(
2724; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2725; CHECK-INTERLEAVED-NEXT:  entry:
2726; CHECK-INTERLEAVED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2727; CHECK-INTERLEAVED:       vector.ph:
2728; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
2729; CHECK-INTERLEAVED:       vector.body:
2730; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2731; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
2732; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
2733; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2734; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
2735; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
2736; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16
2737; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2738; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
2739; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
2740; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
2741; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
2742; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
2743; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16
2744; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1
2745; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1
2746; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32>
2747; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i32>
2748; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = mul <16 x i32> [[TMP9]], [[TMP4]]
2749; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = mul <16 x i32> [[TMP10]], [[TMP5]]
2750; CHECK-INTERLEAVED-NEXT:    [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI]]
2751; CHECK-INTERLEAVED-NEXT:    [[TMP14]] = add <16 x i32> [[TMP12]], [[VEC_PHI1]]
2752; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
2753; CHECK-INTERLEAVED-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2754; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2755; CHECK-INTERLEAVED:       middle.block:
2756; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add <16 x i32> [[TMP14]], [[TMP13]]
2757; CHECK-INTERLEAVED-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
2758; CHECK-INTERLEAVED-NEXT:    [[TMP17:%.*]] = extractelement <16 x i32> [[TMP10]], i32 15
2759; CHECK-INTERLEAVED-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
2760; CHECK-INTERLEAVED:       scalar.ph:
2761; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2762; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2763; CHECK-INTERLEAVED-NEXT:    br label [[FOR_BODY:%.*]]
2764; CHECK-INTERLEAVED:       for.body:
2765; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
2766; CHECK-INTERLEAVED-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
2767; CHECK-INTERLEAVED-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
2768; CHECK-INTERLEAVED-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
2769; CHECK-INTERLEAVED-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
2770; CHECK-INTERLEAVED-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
2771; CHECK-INTERLEAVED-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
2772; CHECK-INTERLEAVED-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
2773; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
2774; CHECK-INTERLEAVED-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
2775; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2776; CHECK-INTERLEAVED-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
2777; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
2778; CHECK-INTERLEAVED:       for.exit:
2779; CHECK-INTERLEAVED-NEXT:    [[EXT_B_LCSSA:%.*]] = phi i32 [ [[EXT_B]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
2780; CHECK-INTERLEAVED-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
2781; CHECK-INTERLEAVED-NEXT:    [[RESULT:%.*]] = add i32 [[ADD_LCSSA]], [[EXT_B_LCSSA]]
2782; CHECK-INTERLEAVED-NEXT:    ret i32 [[RESULT]]
2783;
2784; CHECK-MAXBW-LABEL: define i32 @not_dotp_extend_user(
2785; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2786; CHECK-MAXBW-NEXT:  entry:
2787; CHECK-MAXBW-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2788; CHECK-MAXBW:       vector.ph:
2789; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
2790; CHECK-MAXBW:       vector.body:
2791; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2792; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
2793; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2794; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
2795; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
2796; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2797; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
2798; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
2799; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
2800; CHECK-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
2801; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32>
2802; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]]
2803; CHECK-MAXBW-NEXT:    [[TMP8]] = add <16 x i32> [[TMP7]], [[VEC_PHI]]
2804; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2805; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2806; CHECK-MAXBW-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2807; CHECK-MAXBW:       middle.block:
2808; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
2809; CHECK-MAXBW-NEXT:    [[TMP11:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
2810; CHECK-MAXBW-NEXT:    br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
2811; CHECK-MAXBW:       scalar.ph:
2812; CHECK-MAXBW-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2813; CHECK-MAXBW-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2814; CHECK-MAXBW-NEXT:    br label [[FOR_BODY:%.*]]
2815; CHECK-MAXBW:       for.body:
2816; CHECK-MAXBW-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
2817; CHECK-MAXBW-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
2818; CHECK-MAXBW-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
2819; CHECK-MAXBW-NEXT:    [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
2820; CHECK-MAXBW-NEXT:    [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
2821; CHECK-MAXBW-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
2822; CHECK-MAXBW-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
2823; CHECK-MAXBW-NEXT:    [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
2824; CHECK-MAXBW-NEXT:    [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
2825; CHECK-MAXBW-NEXT:    [[ADD]] = add i32 [[MUL]], [[ACCUM]]
2826; CHECK-MAXBW-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2827; CHECK-MAXBW-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
2828; CHECK-MAXBW-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
2829; CHECK-MAXBW:       for.exit:
2830; CHECK-MAXBW-NEXT:    [[EXT_B_LCSSA:%.*]] = phi i32 [ [[EXT_B]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
2831; CHECK-MAXBW-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
2832; CHECK-MAXBW-NEXT:    [[RESULT:%.*]] = add i32 [[ADD_LCSSA]], [[EXT_B_LCSSA]]
2833; CHECK-MAXBW-NEXT:    ret i32 [[RESULT]]
2834;
2835entry:
2836  br label %for.body
2837
2838for.body:                                         ; preds = %for.body, %entry
2839  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2840  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
2841  %gep.a = getelementptr i8, ptr %a, i64 %iv
2842  %load.a = load i8, ptr %gep.a, align 1
2843  %ext.a = zext i8 %load.a to i32
2844  %gep.b = getelementptr i8, ptr %b, i64 %iv
2845  %load.b = load i8, ptr %gep.b, align 1
2846  %ext.b = zext i8 %load.b to i32
2847  %mul = mul i32 %ext.b, %ext.a
2848  %add = add i32 %mul, %accum
2849  %iv.next = add i64 %iv, 1
2850  %exitcond.not = icmp eq i64 %iv.next, 1024
2851  br i1 %exitcond.not, label %for.exit, label %for.body
2852
2853for.exit:                        ; preds = %for.body
2854  %result = add i32 %add, %ext.b
2855  ret i32 %result
2856}
2857
2858!7 = distinct !{!7, !8, !9, !10}
2859!8 = !{!"llvm.loop.mustprogress"}
2860!9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
2861!10 = !{!"llvm.loop.vectorize.enable", i1 true}
2862;.
2863; CHECK-INTERLEAVE1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
2864; CHECK-INTERLEAVE1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
2865; CHECK-INTERLEAVE1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
2866; CHECK-INTERLEAVE1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
2867; CHECK-INTERLEAVE1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
2868; CHECK-INTERLEAVE1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
2869; CHECK-INTERLEAVE1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
2870; CHECK-INTERLEAVE1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
2871; CHECK-INTERLEAVE1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
2872; CHECK-INTERLEAVE1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
2873; CHECK-INTERLEAVE1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
2874; CHECK-INTERLEAVE1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
2875; CHECK-INTERLEAVE1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
2876; CHECK-INTERLEAVE1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
2877; CHECK-INTERLEAVE1: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]}
2878; CHECK-INTERLEAVE1: [[META15]] = !{!"llvm.loop.mustprogress"}
2879; CHECK-INTERLEAVE1: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]}
2880; CHECK-INTERLEAVE1: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
2881; CHECK-INTERLEAVE1: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]}
2882;.
2883; CHECK-INTERLEAVED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
2884; CHECK-INTERLEAVED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
2885; CHECK-INTERLEAVED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
2886; CHECK-INTERLEAVED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
2887; CHECK-INTERLEAVED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
2888; CHECK-INTERLEAVED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
2889; CHECK-INTERLEAVED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
2890; CHECK-INTERLEAVED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
2891; CHECK-INTERLEAVED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
2892; CHECK-INTERLEAVED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
2893; CHECK-INTERLEAVED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
2894; CHECK-INTERLEAVED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
2895; CHECK-INTERLEAVED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
2896; CHECK-INTERLEAVED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
2897; CHECK-INTERLEAVED: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]}
2898; CHECK-INTERLEAVED: [[META15]] = !{!"llvm.loop.mustprogress"}
2899; CHECK-INTERLEAVED: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]}
2900; CHECK-INTERLEAVED: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
2901; CHECK-INTERLEAVED: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]}
2902;.
2903; CHECK-MAXBW: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
2904; CHECK-MAXBW: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
2905; CHECK-MAXBW: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
2906; CHECK-MAXBW: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
2907; CHECK-MAXBW: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
2908; CHECK-MAXBW: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
2909; CHECK-MAXBW: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
2910; CHECK-MAXBW: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
2911; CHECK-MAXBW: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
2912; CHECK-MAXBW: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
2913; CHECK-MAXBW: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
2914; CHECK-MAXBW: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
2915; CHECK-MAXBW: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
2916; CHECK-MAXBW: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
2917; CHECK-MAXBW: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]}
2918; CHECK-MAXBW: [[META15]] = !{!"llvm.loop.mustprogress"}
2919; CHECK-MAXBW: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]}
2920; CHECK-MAXBW: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
2921; CHECK-MAXBW: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]}
2922;.
2923