xref: /llvm-project/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll (revision 56c091ea7106507b36015297ee9005c9d5fab0bf)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -mattr=+v -S < %s | FileCheck %s
3; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -loop-idiom-vectorize-bytecmp-vf=64 -mattr=+v -S < %s | FileCheck %s --check-prefix=LMUL8
4; RUN: opt -passes='loop(loop-idiom-vectorize),simplifycfg' -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -mattr=+v -S < %s | FileCheck %s --check-prefix=LOOP-DEL
5; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=masked -mattr=+v -S < %s | FileCheck %s --check-prefix=MASKED
6
7define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) {
8; CHECK-LABEL: define i32 @compare_bytes_simple(
9; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
12; CHECK-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
13; CHECK:       mismatch_min_it_check:
14; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
15; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
16; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
17; CHECK-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]]
18; CHECK:       mismatch_mem_check:
19; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
20; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
21; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
22; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
23; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
24; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
25; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
26; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
27; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
28; CHECK-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
29; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
30; CHECK-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
31; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
32; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
33; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
34; CHECK-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
35; CHECK:       mismatch_vec_loop_preheader:
36; CHECK-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
37; CHECK:       mismatch_vec_loop:
38; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
39; CHECK-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
40; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
41; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
42; CHECK-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
43; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
44; CHECK-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
45; CHECK-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
46; CHECK-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
47; CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
48; CHECK-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
49; CHECK:       mismatch_vec_loop_inc:
50; CHECK-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
51; CHECK-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
52; CHECK-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
53; CHECK-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
54; CHECK:       mismatch_vec_loop_found:
55; CHECK-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
56; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
57; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
58; CHECK-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
59; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
60; CHECK-NEXT:    br label [[MISMATCH_END]]
61; CHECK:       mismatch_loop_pre:
62; CHECK-NEXT:    br label [[MISMATCH_LOOP:%.*]]
63; CHECK:       mismatch_loop:
64; CHECK-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
65; CHECK-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
66; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
67; CHECK-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
68; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
69; CHECK-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
70; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
71; CHECK-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
72; CHECK:       mismatch_loop_inc:
73; CHECK-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1
74; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
75; CHECK-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
76; CHECK:       mismatch_end:
77; CHECK-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
78; CHECK-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
79; CHECK:       while.cond:
80; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
81; CHECK-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
82; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
83; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
84; CHECK:       while.body:
85; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
86; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
87; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
88; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
89; CHECK-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
90; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
91; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
92; CHECK:       byte.compare:
93; CHECK-NEXT:    br label [[WHILE_END]]
94; CHECK:       while.end:
95; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
96; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
97;
98; LMUL8-LABEL: define i32 @compare_bytes_simple(
99; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
100; LMUL8-NEXT:  entry:
101; LMUL8-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
102; LMUL8-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
103; LMUL8:       mismatch_min_it_check:
104; LMUL8-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
105; LMUL8-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
106; LMUL8-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
107; LMUL8-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]]
108; LMUL8:       mismatch_mem_check:
109; LMUL8-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
110; LMUL8-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
111; LMUL8-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
112; LMUL8-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
113; LMUL8-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
114; LMUL8-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
115; LMUL8-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
116; LMUL8-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
117; LMUL8-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
118; LMUL8-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
119; LMUL8-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
120; LMUL8-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
121; LMUL8-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
122; LMUL8-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
123; LMUL8-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
124; LMUL8-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
125; LMUL8:       mismatch_vec_loop_preheader:
126; LMUL8-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
127; LMUL8:       mismatch_vec_loop:
128; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
129; LMUL8-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
130; LMUL8-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true)
131; LMUL8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
132; LMUL8-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
133; LMUL8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
134; LMUL8-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
135; LMUL8-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
136; LMUL8-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
137; LMUL8-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
138; LMUL8-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
139; LMUL8:       mismatch_vec_loop_inc:
140; LMUL8-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
141; LMUL8-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
142; LMUL8-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
143; LMUL8-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
144; LMUL8:       mismatch_vec_loop_found:
145; LMUL8-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
146; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
147; LMUL8-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
148; LMUL8-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
149; LMUL8-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
150; LMUL8-NEXT:    br label [[MISMATCH_END]]
151; LMUL8:       mismatch_loop_pre:
152; LMUL8-NEXT:    br label [[MISMATCH_LOOP:%.*]]
153; LMUL8:       mismatch_loop:
154; LMUL8-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
155; LMUL8-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
156; LMUL8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
157; LMUL8-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
158; LMUL8-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
159; LMUL8-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
160; LMUL8-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
161; LMUL8-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
162; LMUL8:       mismatch_loop_inc:
163; LMUL8-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1
164; LMUL8-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
165; LMUL8-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
166; LMUL8:       mismatch_end:
167; LMUL8-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
168; LMUL8-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
169; LMUL8:       while.cond:
170; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
171; LMUL8-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
172; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
173; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
174; LMUL8:       while.body:
175; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
176; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
177; LMUL8-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
178; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
179; LMUL8-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
180; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
181; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
182; LMUL8:       byte.compare:
183; LMUL8-NEXT:    br label [[WHILE_END]]
184; LMUL8:       while.end:
185; LMUL8-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
186; LMUL8-NEXT:    ret i32 [[INC_LCSSA]]
187;
188; LOOP-DEL-LABEL: define i32 @compare_bytes_simple(
189; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
190; LOOP-DEL-NEXT:  entry:
191; LOOP-DEL-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
192; LOOP-DEL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
193; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
194; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
195; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]]
196; LOOP-DEL:       mismatch_mem_check:
197; LOOP-DEL-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
198; LOOP-DEL-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
199; LOOP-DEL-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
200; LOOP-DEL-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
201; LOOP-DEL-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
202; LOOP-DEL-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
203; LOOP-DEL-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
204; LOOP-DEL-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
205; LOOP-DEL-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
206; LOOP-DEL-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
207; LOOP-DEL-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
208; LOOP-DEL-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
209; LOOP-DEL-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
210; LOOP-DEL-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
211; LOOP-DEL-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
212; LOOP-DEL-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1:![0-9]+]]
213; LOOP-DEL:       mismatch_vec_loop:
214; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ]
215; LOOP-DEL-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
216; LOOP-DEL-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
217; LOOP-DEL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
218; LOOP-DEL-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
219; LOOP-DEL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
220; LOOP-DEL-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
221; LOOP-DEL-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
222; LOOP-DEL-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
223; LOOP-DEL-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
224; LOOP-DEL-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
225; LOOP-DEL:       mismatch_vec_loop_inc:
226; LOOP-DEL-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
227; LOOP-DEL-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
228; LOOP-DEL-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
229; LOOP-DEL-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[WHILE_END:%.*]]
230; LOOP-DEL:       mismatch_vec_loop_found:
231; LOOP-DEL-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
232; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
233; LOOP-DEL-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
234; LOOP-DEL-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
235; LOOP-DEL-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
236; LOOP-DEL-NEXT:    br label [[WHILE_END]]
237; LOOP-DEL:       mismatch_loop_pre:
238; LOOP-DEL-NEXT:    br label [[MISMATCH_LOOP:%.*]]
239; LOOP-DEL:       mismatch_loop:
240; LOOP-DEL-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
241; LOOP-DEL-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
242; LOOP-DEL-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
243; LOOP-DEL-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
244; LOOP-DEL-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
245; LOOP-DEL-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
246; LOOP-DEL-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
247; LOOP-DEL-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[WHILE_END]]
248; LOOP-DEL:       mismatch_loop_inc:
249; LOOP-DEL-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1
250; LOOP-DEL-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
251; LOOP-DEL-NEXT:    br i1 [[TMP36]], label [[WHILE_END]], label [[MISMATCH_LOOP]]
252; LOOP-DEL:       while.end:
253; LOOP-DEL-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
254; LOOP-DEL-NEXT:    ret i32 [[MISMATCH_RESULT]]
255;
256; MASKED-LABEL: define i32 @compare_bytes_simple(
257; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
258; MASKED-NEXT:  entry:
259; MASKED-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
260; MASKED-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
261; MASKED:       mismatch_min_it_check:
262; MASKED-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
263; MASKED-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
264; MASKED-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
265; MASKED-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]]
266; MASKED:       mismatch_mem_check:
267; MASKED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
268; MASKED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
269; MASKED-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
270; MASKED-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
271; MASKED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
272; MASKED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
273; MASKED-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
274; MASKED-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
275; MASKED-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
276; MASKED-NEXT:    [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
277; MASKED-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
278; MASKED-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
279; MASKED-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
280; MASKED-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
281; MASKED-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
282; MASKED-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]]
283; MASKED:       mismatch_vec_loop_preheader:
284; MASKED-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
285; MASKED-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
286; MASKED-NEXT:    [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
287; MASKED-NEXT:    br label [[MISMATCH_VEC_LOOP:%.*]]
288; MASKED:       mismatch_vec_loop:
289; MASKED-NEXT:    [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
290; MASKED-NEXT:    [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
291; MASKED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
292; MASKED-NEXT:    [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
293; MASKED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
294; MASKED-NEXT:    [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
295; MASKED-NEXT:    [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
296; MASKED-NEXT:    [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
297; MASKED-NEXT:    [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
298; MASKED-NEXT:    br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
299; MASKED:       mismatch_vec_loop_inc:
300; MASKED-NEXT:    [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
301; MASKED-NEXT:    [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
302; MASKED-NEXT:    [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
303; MASKED-NEXT:    br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
304; MASKED:       mismatch_vec_loop_found:
305; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
306; MASKED-NEXT:    [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
307; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
308; MASKED-NEXT:    [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
309; MASKED-NEXT:    [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
310; MASKED-NEXT:    [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
311; MASKED-NEXT:    [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
312; MASKED-NEXT:    [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
313; MASKED-NEXT:    br label [[MISMATCH_END]]
314; MASKED:       mismatch_loop_pre:
315; MASKED-NEXT:    br label [[MISMATCH_LOOP:%.*]]
316; MASKED:       mismatch_loop:
317; MASKED-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
318; MASKED-NEXT:    [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
319; MASKED-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
320; MASKED-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
321; MASKED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
322; MASKED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
323; MASKED-NEXT:    [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
324; MASKED-NEXT:    br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
325; MASKED:       mismatch_loop_inc:
326; MASKED-NEXT:    [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1
327; MASKED-NEXT:    [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
328; MASKED-NEXT:    br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
329; MASKED:       mismatch_end:
330; MASKED-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
331; MASKED-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
332; MASKED:       while.cond:
333; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
334; MASKED-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
335; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
336; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
337; MASKED:       while.body:
338; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
339; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
340; MASKED-NEXT:    [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
341; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
342; MASKED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
343; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
344; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
345; MASKED:       byte.compare:
346; MASKED-NEXT:    br label [[WHILE_END]]
347; MASKED:       while.end:
348; MASKED-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
349; MASKED-NEXT:    ret i32 [[INC_LCSSA]]
350;
351entry:
352  br label %while.cond
353
354while.cond:
355  %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
356  %inc = add i32 %len.addr, 1
357  %cmp.not = icmp eq i32 %inc, %n
358  br i1 %cmp.not, label %while.end, label %while.body
359
360while.body:
361  %idxprom = zext i32 %inc to i64
362  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
363  %0 = load i8, ptr %arrayidx
364  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
365  %1 = load i8, ptr %arrayidx2
366  %cmp.not2 = icmp eq i8 %0, %1
367  br i1 %cmp.not2, label %while.cond, label %while.end
368
369while.end:
370  %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ]
371  ret i32 %inc.lcssa
372}
373
374define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) {
375; CHECK-LABEL: define i32 @compare_bytes_signed_wrap(
376; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
377; CHECK-NEXT:  entry:
378; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
379; CHECK-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
380; CHECK:       mismatch_min_it_check:
381; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
382; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
383; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
384; CHECK-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
385; CHECK:       mismatch_mem_check:
386; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
387; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
388; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
389; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
390; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
391; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
392; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
393; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
394; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
395; CHECK-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
396; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
397; CHECK-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
398; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
399; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
400; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
401; CHECK-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
402; CHECK:       mismatch_vec_loop_preheader:
403; CHECK-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
404; CHECK:       mismatch_vec_loop:
405; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
406; CHECK-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
407; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
408; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
409; CHECK-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
410; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
411; CHECK-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
412; CHECK-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
413; CHECK-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
414; CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
415; CHECK-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
416; CHECK:       mismatch_vec_loop_inc:
417; CHECK-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
418; CHECK-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
419; CHECK-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
420; CHECK-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
421; CHECK:       mismatch_vec_loop_found:
422; CHECK-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
423; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
424; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
425; CHECK-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
426; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
427; CHECK-NEXT:    br label [[MISMATCH_END]]
428; CHECK:       mismatch_loop_pre:
429; CHECK-NEXT:    br label [[MISMATCH_LOOP:%.*]]
430; CHECK:       mismatch_loop:
431; CHECK-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
432; CHECK-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
433; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
434; CHECK-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
435; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
436; CHECK-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
437; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
438; CHECK-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
439; CHECK:       mismatch_loop_inc:
440; CHECK-NEXT:    [[TMP35]] = add nsw i32 [[MISMATCH_INDEX]], 1
441; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
442; CHECK-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
443; CHECK:       mismatch_end:
444; CHECK-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
445; CHECK-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
446; CHECK:       while.cond:
447; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
448; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1
449; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
450; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
451; CHECK:       while.body:
452; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
453; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
454; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
455; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
456; CHECK-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
457; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
458; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
459; CHECK:       byte.compare:
460; CHECK-NEXT:    br label [[WHILE_END]]
461; CHECK:       while.end:
462; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
463; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
464;
465; LMUL8-LABEL: define i32 @compare_bytes_signed_wrap(
466; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
467; LMUL8-NEXT:  entry:
468; LMUL8-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
469; LMUL8-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
470; LMUL8:       mismatch_min_it_check:
471; LMUL8-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
472; LMUL8-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
473; LMUL8-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
474; LMUL8-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
475; LMUL8:       mismatch_mem_check:
476; LMUL8-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
477; LMUL8-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
478; LMUL8-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
479; LMUL8-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
480; LMUL8-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
481; LMUL8-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
482; LMUL8-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
483; LMUL8-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
484; LMUL8-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
485; LMUL8-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
486; LMUL8-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
487; LMUL8-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
488; LMUL8-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
489; LMUL8-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
490; LMUL8-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
491; LMUL8-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
492; LMUL8:       mismatch_vec_loop_preheader:
493; LMUL8-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
494; LMUL8:       mismatch_vec_loop:
495; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
496; LMUL8-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
497; LMUL8-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true)
498; LMUL8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
499; LMUL8-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
500; LMUL8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
501; LMUL8-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
502; LMUL8-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
503; LMUL8-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
504; LMUL8-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
505; LMUL8-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
506; LMUL8:       mismatch_vec_loop_inc:
507; LMUL8-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
508; LMUL8-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
509; LMUL8-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
510; LMUL8-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
511; LMUL8:       mismatch_vec_loop_found:
512; LMUL8-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
513; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
514; LMUL8-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
515; LMUL8-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
516; LMUL8-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
517; LMUL8-NEXT:    br label [[MISMATCH_END]]
518; LMUL8:       mismatch_loop_pre:
519; LMUL8-NEXT:    br label [[MISMATCH_LOOP:%.*]]
520; LMUL8:       mismatch_loop:
521; LMUL8-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
522; LMUL8-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
523; LMUL8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
524; LMUL8-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
525; LMUL8-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
526; LMUL8-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
527; LMUL8-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
528; LMUL8-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
529; LMUL8:       mismatch_loop_inc:
530; LMUL8-NEXT:    [[TMP35]] = add nsw i32 [[MISMATCH_INDEX]], 1
531; LMUL8-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
532; LMUL8-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
533; LMUL8:       mismatch_end:
534; LMUL8-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
535; LMUL8-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
536; LMUL8:       while.cond:
537; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
538; LMUL8-NEXT:    [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1
539; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
540; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
541; LMUL8:       while.body:
542; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
543; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
544; LMUL8-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
545; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
546; LMUL8-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
547; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
548; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
549; LMUL8:       byte.compare:
550; LMUL8-NEXT:    br label [[WHILE_END]]
551; LMUL8:       while.end:
552; LMUL8-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
553; LMUL8-NEXT:    ret i32 [[INC_LCSSA]]
554;
555; LOOP-DEL-LABEL: define i32 @compare_bytes_signed_wrap(
556; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
557; LOOP-DEL-NEXT:  entry:
558; LOOP-DEL-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
559; LOOP-DEL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
560; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
561; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
562; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
563; LOOP-DEL:       mismatch_mem_check:
564; LOOP-DEL-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
565; LOOP-DEL-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
566; LOOP-DEL-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
567; LOOP-DEL-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
568; LOOP-DEL-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
569; LOOP-DEL-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
570; LOOP-DEL-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
571; LOOP-DEL-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
572; LOOP-DEL-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
573; LOOP-DEL-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
574; LOOP-DEL-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
575; LOOP-DEL-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
576; LOOP-DEL-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
577; LOOP-DEL-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
578; LOOP-DEL-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
579; LOOP-DEL-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1]]
580; LOOP-DEL:       mismatch_vec_loop:
581; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ]
582; LOOP-DEL-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
583; LOOP-DEL-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
584; LOOP-DEL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
585; LOOP-DEL-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
586; LOOP-DEL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
587; LOOP-DEL-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
588; LOOP-DEL-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
589; LOOP-DEL-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
590; LOOP-DEL-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
591; LOOP-DEL-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
592; LOOP-DEL:       mismatch_vec_loop_inc:
593; LOOP-DEL-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
594; LOOP-DEL-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
595; LOOP-DEL-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
596; LOOP-DEL-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[WHILE_END:%.*]]
597; LOOP-DEL:       mismatch_vec_loop_found:
598; LOOP-DEL-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
599; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
600; LOOP-DEL-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
601; LOOP-DEL-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
602; LOOP-DEL-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
603; LOOP-DEL-NEXT:    br label [[WHILE_END]]
604; LOOP-DEL:       mismatch_loop_pre:
605; LOOP-DEL-NEXT:    br label [[MISMATCH_LOOP:%.*]]
606; LOOP-DEL:       mismatch_loop:
607; LOOP-DEL-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
608; LOOP-DEL-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
609; LOOP-DEL-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
610; LOOP-DEL-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
611; LOOP-DEL-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
612; LOOP-DEL-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
613; LOOP-DEL-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
614; LOOP-DEL-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[WHILE_END]]
615; LOOP-DEL:       mismatch_loop_inc:
616; LOOP-DEL-NEXT:    [[TMP35]] = add nsw i32 [[MISMATCH_INDEX]], 1
617; LOOP-DEL-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
618; LOOP-DEL-NEXT:    br i1 [[TMP36]], label [[WHILE_END]], label [[MISMATCH_LOOP]]
619; LOOP-DEL:       while.end:
620; LOOP-DEL-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
621; LOOP-DEL-NEXT:    ret i32 [[MISMATCH_RESULT]]
622;
623; MASKED-LABEL: define i32 @compare_bytes_signed_wrap(
624; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
625; MASKED-NEXT:  entry:
626; MASKED-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
627; MASKED-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
628; MASKED:       mismatch_min_it_check:
629; MASKED-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
630; MASKED-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
631; MASKED-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
632; MASKED-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
633; MASKED:       mismatch_mem_check:
634; MASKED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
635; MASKED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
636; MASKED-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
637; MASKED-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
638; MASKED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
639; MASKED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
640; MASKED-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
641; MASKED-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
642; MASKED-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
643; MASKED-NEXT:    [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
644; MASKED-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
645; MASKED-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
646; MASKED-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
647; MASKED-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
648; MASKED-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
649; MASKED-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
650; MASKED:       mismatch_vec_loop_preheader:
651; MASKED-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
652; MASKED-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
653; MASKED-NEXT:    [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
654; MASKED-NEXT:    br label [[MISMATCH_VEC_LOOP:%.*]]
655; MASKED:       mismatch_vec_loop:
656; MASKED-NEXT:    [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
657; MASKED-NEXT:    [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
658; MASKED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
659; MASKED-NEXT:    [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
660; MASKED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
661; MASKED-NEXT:    [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
662; MASKED-NEXT:    [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
663; MASKED-NEXT:    [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
664; MASKED-NEXT:    [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
665; MASKED-NEXT:    br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
666; MASKED:       mismatch_vec_loop_inc:
667; MASKED-NEXT:    [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
668; MASKED-NEXT:    [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
669; MASKED-NEXT:    [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
670; MASKED-NEXT:    br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
671; MASKED:       mismatch_vec_loop_found:
672; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
673; MASKED-NEXT:    [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
674; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
675; MASKED-NEXT:    [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
676; MASKED-NEXT:    [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
677; MASKED-NEXT:    [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
678; MASKED-NEXT:    [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
679; MASKED-NEXT:    [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
680; MASKED-NEXT:    br label [[MISMATCH_END]]
681; MASKED:       mismatch_loop_pre:
682; MASKED-NEXT:    br label [[MISMATCH_LOOP:%.*]]
683; MASKED:       mismatch_loop:
684; MASKED-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
685; MASKED-NEXT:    [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
686; MASKED-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
687; MASKED-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
688; MASKED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
689; MASKED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
690; MASKED-NEXT:    [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
691; MASKED-NEXT:    br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
692; MASKED:       mismatch_loop_inc:
693; MASKED-NEXT:    [[TMP43]] = add nsw i32 [[MISMATCH_INDEX]], 1
694; MASKED-NEXT:    [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
695; MASKED-NEXT:    br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
696; MASKED:       mismatch_end:
697; MASKED-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
698; MASKED-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
699; MASKED:       while.cond:
700; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
701; MASKED-NEXT:    [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1
702; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
703; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
704; MASKED:       while.body:
705; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
706; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
707; MASKED-NEXT:    [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
708; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
709; MASKED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
710; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
711; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
712; MASKED:       byte.compare:
713; MASKED-NEXT:    br label [[WHILE_END]]
714; MASKED:       while.end:
715; MASKED-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
716; MASKED-NEXT:    ret i32 [[INC_LCSSA]]
717;
718; NO-TRANSFORM-LABEL: define i32 @compare_bytes_signed_wrap(
719; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
720; NO-TRANSFORM-NEXT:  entry:
721; NO-TRANSFORM-NEXT:    br label [[WHILE_COND:%.*]]
722; NO-TRANSFORM:       while.cond:
723; NO-TRANSFORM-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
724; NO-TRANSFORM-NEXT:    [[INC]] = add nsw i32 [[LEN_ADDR]], 1
725; NO-TRANSFORM-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
726; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
727; NO-TRANSFORM:       while.body:
728; NO-TRANSFORM-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
729; NO-TRANSFORM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
730; NO-TRANSFORM-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
731; NO-TRANSFORM-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
732; NO-TRANSFORM-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
733; NO-TRANSFORM-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
734; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
735; NO-TRANSFORM:       while.end:
736; NO-TRANSFORM-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
737; NO-TRANSFORM-NEXT:    ret i32 [[INC_LCSSA]]
738entry:
739  br label %while.cond
740
741while.cond:
742  %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
743  %inc = add nsw i32 %len.addr, 1
744  %cmp.not = icmp eq i32 %inc, %n
745  br i1 %cmp.not, label %while.end, label %while.body
746
747while.body:
748  %idxprom = zext i32 %inc to i64
749  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
750  %0 = load i8, ptr %arrayidx
751  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
752  %1 = load i8, ptr %arrayidx2
753  %cmp.not2 = icmp eq i8 %0, %1
754  br i1 %cmp.not2, label %while.cond, label %while.end
755
756while.end:
757  %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ]
758  ret i32 %inc.lcssa
759}
760
761
762define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i32 %len, i32 %n) {
763; CHECK-LABEL: define i32 @compare_bytes_simple_end_ne_found(
764; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
765; CHECK-NEXT:  entry:
766; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
767; CHECK-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
768; CHECK:       mismatch_min_it_check:
769; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
770; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
771; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
772; CHECK-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
773; CHECK:       mismatch_mem_check:
774; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
775; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
776; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
777; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
778; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
779; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
780; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
781; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
782; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
783; CHECK-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
784; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
785; CHECK-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
786; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
787; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
788; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
789; CHECK-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
790; CHECK:       mismatch_vec_loop_preheader:
791; CHECK-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
792; CHECK:       mismatch_vec_loop:
793; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
794; CHECK-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
795; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
796; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
797; CHECK-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
798; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
799; CHECK-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
800; CHECK-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
801; CHECK-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
802; CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
803; CHECK-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
804; CHECK:       mismatch_vec_loop_inc:
805; CHECK-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
806; CHECK-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
807; CHECK-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
808; CHECK-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
809; CHECK:       mismatch_vec_loop_found:
810; CHECK-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
811; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
812; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
813; CHECK-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
814; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
815; CHECK-NEXT:    br label [[MISMATCH_END]]
816; CHECK:       mismatch_loop_pre:
817; CHECK-NEXT:    br label [[MISMATCH_LOOP:%.*]]
818; CHECK:       mismatch_loop:
819; CHECK-NEXT:    [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
820; CHECK-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64
821; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
822; CHECK-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
823; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
824; CHECK-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
825; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
826; CHECK-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
827; CHECK:       mismatch_loop_inc:
828; CHECK-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX3]], 1
829; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
830; CHECK-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
831; CHECK:       mismatch_end:
832; CHECK-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
833; CHECK-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
834; CHECK:       while.cond:
835; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
836; CHECK-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
837; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
838; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
839; CHECK:       while.body:
840; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
841; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
842; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
843; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
844; CHECK-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
845; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
846; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]]
847; CHECK:       while.found:
848; CHECK-NEXT:    [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
849; CHECK-NEXT:    [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ]
850; CHECK-NEXT:    br label [[END:%.*]]
851; CHECK:       byte.compare:
852; CHECK-NEXT:    [[TMP39:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
853; CHECK-NEXT:    br i1 [[TMP39]], label [[WHILE_END]], label [[WHILE_FOUND]]
854; CHECK:       while.end:
855; CHECK-NEXT:    [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ]
856; CHECK-NEXT:    [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ]
857; CHECK-NEXT:    br label [[END]]
858; CHECK:       end:
859; CHECK-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ]
860; CHECK-NEXT:    [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ]
861; CHECK-NEXT:    store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
862; CHECK-NEXT:    ret i32 [[MISMATCH_INDEX]]
863;
864; LMUL8-LABEL: define i32 @compare_bytes_simple_end_ne_found(
865; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
866; LMUL8-NEXT:  entry:
867; LMUL8-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
868; LMUL8-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
869; LMUL8:       mismatch_min_it_check:
870; LMUL8-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
871; LMUL8-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
872; LMUL8-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
873; LMUL8-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
874; LMUL8:       mismatch_mem_check:
875; LMUL8-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
876; LMUL8-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
877; LMUL8-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
878; LMUL8-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
879; LMUL8-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
880; LMUL8-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
881; LMUL8-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
882; LMUL8-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
883; LMUL8-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
884; LMUL8-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
885; LMUL8-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
886; LMUL8-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
887; LMUL8-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
888; LMUL8-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
889; LMUL8-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
890; LMUL8-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
891; LMUL8:       mismatch_vec_loop_preheader:
892; LMUL8-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
893; LMUL8:       mismatch_vec_loop:
894; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
895; LMUL8-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
896; LMUL8-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true)
897; LMUL8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
898; LMUL8-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
899; LMUL8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
900; LMUL8-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
901; LMUL8-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
902; LMUL8-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
903; LMUL8-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
904; LMUL8-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
905; LMUL8:       mismatch_vec_loop_inc:
906; LMUL8-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
907; LMUL8-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
908; LMUL8-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
909; LMUL8-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
910; LMUL8:       mismatch_vec_loop_found:
911; LMUL8-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
912; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
913; LMUL8-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
914; LMUL8-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
915; LMUL8-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
916; LMUL8-NEXT:    br label [[MISMATCH_END]]
917; LMUL8:       mismatch_loop_pre:
918; LMUL8-NEXT:    br label [[MISMATCH_LOOP:%.*]]
919; LMUL8:       mismatch_loop:
920; LMUL8-NEXT:    [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
921; LMUL8-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64
922; LMUL8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
923; LMUL8-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
924; LMUL8-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
925; LMUL8-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
926; LMUL8-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
927; LMUL8-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
928; LMUL8:       mismatch_loop_inc:
929; LMUL8-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX3]], 1
930; LMUL8-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
931; LMUL8-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
932; LMUL8:       mismatch_end:
933; LMUL8-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
934; LMUL8-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
935; LMUL8:       while.cond:
936; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
937; LMUL8-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
938; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
939; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
940; LMUL8:       while.body:
941; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
942; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
943; LMUL8-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
944; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
945; LMUL8-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
946; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
947; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]]
948; LMUL8:       while.found:
949; LMUL8-NEXT:    [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
950; LMUL8-NEXT:    [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ]
951; LMUL8-NEXT:    br label [[END:%.*]]
952; LMUL8:       byte.compare:
953; LMUL8-NEXT:    [[TMP39:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
954; LMUL8-NEXT:    br i1 [[TMP39]], label [[WHILE_END]], label [[WHILE_FOUND]]
955; LMUL8:       while.end:
956; LMUL8-NEXT:    [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ]
957; LMUL8-NEXT:    [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ]
958; LMUL8-NEXT:    br label [[END]]
959; LMUL8:       end:
960; LMUL8-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ]
961; LMUL8-NEXT:    [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ]
962; LMUL8-NEXT:    store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
963; LMUL8-NEXT:    ret i32 [[MISMATCH_INDEX]]
964;
965; LOOP-DEL-LABEL: define i32 @compare_bytes_simple_end_ne_found(
966; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
967; LOOP-DEL-NEXT:  entry:
968; LOOP-DEL-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
969; LOOP-DEL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
970; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
971; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
972; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
973; LOOP-DEL:       mismatch_mem_check:
974; LOOP-DEL-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
975; LOOP-DEL-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
976; LOOP-DEL-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
977; LOOP-DEL-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
978; LOOP-DEL-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
979; LOOP-DEL-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
980; LOOP-DEL-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
981; LOOP-DEL-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
982; LOOP-DEL-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
983; LOOP-DEL-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
984; LOOP-DEL-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
985; LOOP-DEL-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
986; LOOP-DEL-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
987; LOOP-DEL-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
988; LOOP-DEL-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
989; LOOP-DEL-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1]]
990; LOOP-DEL:       mismatch_vec_loop:
991; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ]
992; LOOP-DEL-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
993; LOOP-DEL-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
994; LOOP-DEL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
995; LOOP-DEL-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
996; LOOP-DEL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
997; LOOP-DEL-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
998; LOOP-DEL-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
999; LOOP-DEL-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1000; LOOP-DEL-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
1001; LOOP-DEL-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
1002; LOOP-DEL:       mismatch_vec_loop_inc:
1003; LOOP-DEL-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
1004; LOOP-DEL-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
1005; LOOP-DEL-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
1006; LOOP-DEL-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[BYTE_COMPARE:%.*]]
1007; LOOP-DEL:       mismatch_vec_loop_found:
1008; LOOP-DEL-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
1009; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
1010; LOOP-DEL-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64
1011; LOOP-DEL-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]]
1012; LOOP-DEL-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
1013; LOOP-DEL-NEXT:    br label [[BYTE_COMPARE]]
1014; LOOP-DEL:       mismatch_loop_pre:
1015; LOOP-DEL-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1016; LOOP-DEL:       mismatch_loop:
1017; LOOP-DEL-NEXT:    [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1018; LOOP-DEL-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64
1019; LOOP-DEL-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
1020; LOOP-DEL-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
1021; LOOP-DEL-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
1022; LOOP-DEL-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
1023; LOOP-DEL-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
1024; LOOP-DEL-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[BYTE_COMPARE]]
1025; LOOP-DEL:       mismatch_loop_inc:
1026; LOOP-DEL-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX3]], 1
1027; LOOP-DEL-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1028; LOOP-DEL-NEXT:    br i1 [[TMP36]], label [[BYTE_COMPARE]], label [[MISMATCH_LOOP]]
1029; LOOP-DEL:       byte.compare:
1030; LOOP-DEL-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
1031; LOOP-DEL-NEXT:    [[TMP37:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
1032; LOOP-DEL-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP37]], i32 [[N]], i32 [[MISMATCH_RESULT]]
1033; LOOP-DEL-NEXT:    [[SPEC_SELECT4:%.*]] = select i1 [[TMP37]], ptr [[D]], ptr [[C]]
1034; LOOP-DEL-NEXT:    store i32 [[SPEC_SELECT]], ptr [[SPEC_SELECT4]], align 4
1035; LOOP-DEL-NEXT:    ret i32 [[SPEC_SELECT]]
1036;
1037; MASKED-LABEL: define i32 @compare_bytes_simple_end_ne_found(
1038; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1039; MASKED-NEXT:  entry:
1040; MASKED-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
1041; MASKED-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1042; MASKED:       mismatch_min_it_check:
1043; MASKED-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1044; MASKED-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
1045; MASKED-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
1046; MASKED-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1047; MASKED:       mismatch_mem_check:
1048; MASKED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
1049; MASKED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
1050; MASKED-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
1051; MASKED-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
1052; MASKED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
1053; MASKED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
1054; MASKED-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
1055; MASKED-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
1056; MASKED-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
1057; MASKED-NEXT:    [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
1058; MASKED-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
1059; MASKED-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
1060; MASKED-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
1061; MASKED-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
1062; MASKED-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
1063; MASKED-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1064; MASKED:       mismatch_vec_loop_preheader:
1065; MASKED-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
1066; MASKED-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
1067; MASKED-NEXT:    [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
1068; MASKED-NEXT:    br label [[MISMATCH_VEC_LOOP:%.*]]
1069; MASKED:       mismatch_vec_loop:
1070; MASKED-NEXT:    [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
1071; MASKED-NEXT:    [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
1072; MASKED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
1073; MASKED-NEXT:    [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
1074; MASKED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
1075; MASKED-NEXT:    [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
1076; MASKED-NEXT:    [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
1077; MASKED-NEXT:    [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
1078; MASKED-NEXT:    [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
1079; MASKED-NEXT:    br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
1080; MASKED:       mismatch_vec_loop_inc:
1081; MASKED-NEXT:    [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
1082; MASKED-NEXT:    [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
1083; MASKED-NEXT:    [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
1084; MASKED-NEXT:    br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
1085; MASKED:       mismatch_vec_loop_found:
1086; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
1087; MASKED-NEXT:    [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
1088; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
1089; MASKED-NEXT:    [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
1090; MASKED-NEXT:    [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
1091; MASKED-NEXT:    [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
1092; MASKED-NEXT:    [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
1093; MASKED-NEXT:    [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
1094; MASKED-NEXT:    br label [[MISMATCH_END]]
1095; MASKED:       mismatch_loop_pre:
1096; MASKED-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1097; MASKED:       mismatch_loop:
1098; MASKED-NEXT:    [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1099; MASKED-NEXT:    [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64
1100; MASKED-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
1101; MASKED-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
1102; MASKED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
1103; MASKED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
1104; MASKED-NEXT:    [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
1105; MASKED-NEXT:    br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1106; MASKED:       mismatch_loop_inc:
1107; MASKED-NEXT:    [[TMP43]] = add i32 [[MISMATCH_INDEX3]], 1
1108; MASKED-NEXT:    [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
1109; MASKED-NEXT:    br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1110; MASKED:       mismatch_end:
1111; MASKED-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
1112; MASKED-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1113; MASKED:       while.cond:
1114; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
1115; MASKED-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
1116; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
1117; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
1118; MASKED:       while.body:
1119; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1120; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1121; MASKED-NEXT:    [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1122; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1123; MASKED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1124; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
1125; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]]
1126; MASKED:       while.found:
1127; MASKED-NEXT:    [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1128; MASKED-NEXT:    [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ]
1129; MASKED-NEXT:    br label [[END:%.*]]
1130; MASKED:       byte.compare:
1131; MASKED-NEXT:    [[TMP47:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
1132; MASKED-NEXT:    br i1 [[TMP47]], label [[WHILE_END]], label [[WHILE_FOUND]]
1133; MASKED:       while.end:
1134; MASKED-NEXT:    [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ]
1135; MASKED-NEXT:    [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ]
1136; MASKED-NEXT:    br label [[END]]
1137; MASKED:       end:
1138; MASKED-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ]
1139; MASKED-NEXT:    [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ]
1140; MASKED-NEXT:    store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
1141; MASKED-NEXT:    ret i32 [[MISMATCH_INDEX]]
1142;
1143; NO-TRANSFORM-LABEL: define i32 @compare_bytes_simple_end_ne_found(
1144; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
1145; NO-TRANSFORM-NEXT:  entry:
1146; NO-TRANSFORM-NEXT:    br label [[WHILE_COND:%.*]]
1147; NO-TRANSFORM:       while.cond:
1148; NO-TRANSFORM-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
1149; NO-TRANSFORM-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
1150; NO-TRANSFORM-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
1151; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
1152; NO-TRANSFORM:       while.body:
1153; NO-TRANSFORM-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1154; NO-TRANSFORM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1155; NO-TRANSFORM-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1156; NO-TRANSFORM-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1157; NO-TRANSFORM-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1158; NO-TRANSFORM-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1159; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]]
1160; NO-TRANSFORM:       while.found:
1161; NO-TRANSFORM-NEXT:    [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
1162; NO-TRANSFORM-NEXT:    [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ]
1163; NO-TRANSFORM-NEXT:    br label [[END:%.*]]
1164; NO-TRANSFORM:       while.end:
1165; NO-TRANSFORM-NEXT:    [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ]
1166; NO-TRANSFORM-NEXT:    [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ]
1167; NO-TRANSFORM-NEXT:    br label [[END]]
1168; NO-TRANSFORM:       end:
1169; NO-TRANSFORM-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ]
1170; NO-TRANSFORM-NEXT:    [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ]
1171; NO-TRANSFORM-NEXT:    store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4
1172; NO-TRANSFORM-NEXT:    ret i32 [[MISMATCH_INDEX]]
1173entry:
1174  br label %while.cond
1175
1176while.cond:
1177  %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
1178  %inc = add i32 %len.addr, 1
1179  %cmp.not = icmp eq i32 %inc, %n
1180  br i1 %cmp.not, label %while.end, label %while.body
1181
1182while.body:
1183  %idxprom = zext i32 %inc to i64
1184  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
1185  %0 = load i8, ptr %arrayidx
1186  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
1187  %1 = load i8, ptr %arrayidx2
1188  %cmp.not2 = icmp eq i8 %0, %1
1189  br i1 %cmp.not2, label %while.cond, label %while.found
1190
1191while.found:
1192  %mismatch_index1 = phi i32 [ %inc, %while.body ]
1193  %found_ptr = phi ptr [ %c, %while.body ]
1194  br label %end
1195
1196while.end:
1197  %mismatch_index2 = phi i32 [ %n, %while.cond ]
1198  %end_ptr = phi ptr [ %d, %while.cond ]
1199  br label %end
1200
1201end:
1202  %mismatch_index = phi i32 [ %mismatch_index1, %while.found ], [ %mismatch_index2, %while.end ]
1203  %store_ptr = phi ptr [ %end_ptr, %while.end ], [ %found_ptr, %while.found ]
1204  store i32 %mismatch_index, ptr %store_ptr
1205  ret i32 %mismatch_index
1206}
1207
1208
1209
1210define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
1211; CHECK-LABEL: define i32 @compare_bytes_extra_cmp(
1212; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
1213; CHECK-NEXT:  entry:
1214; CHECK-NEXT:    [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
1215; CHECK-NEXT:    br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
1216; CHECK:       ph:
1217; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
1218; CHECK-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1219; CHECK:       mismatch_min_it_check:
1220; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1221; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
1222; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
1223; CHECK-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1224; CHECK:       mismatch_mem_check:
1225; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
1226; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
1227; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
1228; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
1229; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
1230; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
1231; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
1232; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
1233; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
1234; CHECK-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
1235; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
1236; CHECK-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
1237; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
1238; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
1239; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
1240; CHECK-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1241; CHECK:       mismatch_vec_loop_preheader:
1242; CHECK-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
1243; CHECK:       mismatch_vec_loop:
1244; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
1245; CHECK-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
1246; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
1247; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
1248; CHECK-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1249; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
1250; CHECK-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1251; CHECK-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1252; CHECK-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1253; CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
1254; CHECK-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
1255; CHECK:       mismatch_vec_loop_inc:
1256; CHECK-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
1257; CHECK-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
1258; CHECK-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
1259; CHECK-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
1260; CHECK:       mismatch_vec_loop_found:
1261; CHECK-NEXT:    [[FIRST2:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
1262; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX3:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
1263; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST2]] to i64
1264; CHECK-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX3]], [[TMP26]]
1265; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
1266; CHECK-NEXT:    br label [[MISMATCH_END]]
1267; CHECK:       mismatch_loop_pre:
1268; CHECK-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1269; CHECK:       mismatch_loop:
1270; CHECK-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1271; CHECK-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1272; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
1273; CHECK-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
1274; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
1275; CHECK-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
1276; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
1277; CHECK-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1278; CHECK:       mismatch_loop_inc:
1279; CHECK-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1
1280; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1281; CHECK-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1282; CHECK:       mismatch_end:
1283; CHECK-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
1284; CHECK-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1285; CHECK:       while.cond:
1286; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
1287; CHECK-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
1288; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
1289; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
1290; CHECK:       while.body:
1291; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1292; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1293; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1294; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1295; CHECK-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1296; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
1297; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
1298; CHECK:       byte.compare:
1299; CHECK-NEXT:    br label [[WHILE_END_LOOPEXIT]]
1300; CHECK:       while.end.loopexit:
1301; CHECK-NEXT:    [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1302; CHECK-NEXT:    br label [[WHILE_END]]
1303; CHECK:       while.end:
1304; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
1305; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
1306;
1307; LMUL8-LABEL: define i32 @compare_bytes_extra_cmp(
1308; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
1309; LMUL8-NEXT:  entry:
1310; LMUL8-NEXT:    [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
1311; LMUL8-NEXT:    br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
1312; LMUL8:       ph:
1313; LMUL8-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
1314; LMUL8-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1315; LMUL8:       mismatch_min_it_check:
1316; LMUL8-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1317; LMUL8-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
1318; LMUL8-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
1319; LMUL8-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1320; LMUL8:       mismatch_mem_check:
1321; LMUL8-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
1322; LMUL8-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
1323; LMUL8-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
1324; LMUL8-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
1325; LMUL8-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
1326; LMUL8-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
1327; LMUL8-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
1328; LMUL8-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
1329; LMUL8-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
1330; LMUL8-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
1331; LMUL8-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
1332; LMUL8-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
1333; LMUL8-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
1334; LMUL8-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
1335; LMUL8-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
1336; LMUL8-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1337; LMUL8:       mismatch_vec_loop_preheader:
1338; LMUL8-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
1339; LMUL8:       mismatch_vec_loop:
1340; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
1341; LMUL8-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
1342; LMUL8-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true)
1343; LMUL8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
1344; LMUL8-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
1345; LMUL8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
1346; LMUL8-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
1347; LMUL8-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
1348; LMUL8-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]])
1349; LMUL8-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
1350; LMUL8-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
1351; LMUL8:       mismatch_vec_loop_inc:
1352; LMUL8-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
1353; LMUL8-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
1354; LMUL8-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
1355; LMUL8-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
1356; LMUL8:       mismatch_vec_loop_found:
1357; LMUL8-NEXT:    [[FIRST2:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
1358; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX3:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
1359; LMUL8-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST2]] to i64
1360; LMUL8-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX3]], [[TMP26]]
1361; LMUL8-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
1362; LMUL8-NEXT:    br label [[MISMATCH_END]]
1363; LMUL8:       mismatch_loop_pre:
1364; LMUL8-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1365; LMUL8:       mismatch_loop:
1366; LMUL8-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1367; LMUL8-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1368; LMUL8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
1369; LMUL8-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
1370; LMUL8-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
1371; LMUL8-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
1372; LMUL8-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
1373; LMUL8-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1374; LMUL8:       mismatch_loop_inc:
1375; LMUL8-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1
1376; LMUL8-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1377; LMUL8-NEXT:    br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1378; LMUL8:       mismatch_end:
1379; LMUL8-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
1380; LMUL8-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1381; LMUL8:       while.cond:
1382; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
1383; LMUL8-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
1384; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
1385; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
1386; LMUL8:       while.body:
1387; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1388; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1389; LMUL8-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1390; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1391; LMUL8-NEXT:    [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1392; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]]
1393; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
1394; LMUL8:       byte.compare:
1395; LMUL8-NEXT:    br label [[WHILE_END_LOOPEXIT]]
1396; LMUL8:       while.end.loopexit:
1397; LMUL8-NEXT:    [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1398; LMUL8-NEXT:    br label [[WHILE_END]]
1399; LMUL8:       while.end:
1400; LMUL8-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
1401; LMUL8-NEXT:    ret i32 [[INC_LCSSA]]
1402;
1403; LOOP-DEL-LABEL: define i32 @compare_bytes_extra_cmp(
1404; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
1405; LOOP-DEL-NEXT:  entry:
1406; LOOP-DEL-NEXT:    [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
1407; LOOP-DEL-NEXT:    br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
1408; LOOP-DEL:       ph:
1409; LOOP-DEL-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
1410; LOOP-DEL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1411; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
1412; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
1413; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1414; LOOP-DEL:       mismatch_mem_check:
1415; LOOP-DEL-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
1416; LOOP-DEL-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
1417; LOOP-DEL-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64
1418; LOOP-DEL-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
1419; LOOP-DEL-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
1420; LOOP-DEL-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
1421; LOOP-DEL-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
1422; LOOP-DEL-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
1423; LOOP-DEL-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP7]], 12
1424; LOOP-DEL-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP11]], 12
1425; LOOP-DEL-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP8]], 12
1426; LOOP-DEL-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 12
1427; LOOP-DEL-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]]
1428; LOOP-DEL-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]]
1429; LOOP-DEL-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
1430; LOOP-DEL-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1]]
1431; LOOP-DEL:       mismatch_vec_loop:
1432; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ]
1433; LOOP-DEL-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]]
1434; LOOP-DEL-NEXT:    [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
1435; LOOP-DEL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]]
1436; LOOP-DEL-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1437; LOOP-DEL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
1438; LOOP-DEL-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1439; LOOP-DEL-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1440; LOOP-DEL-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]])
1441; LOOP-DEL-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]]
1442; LOOP-DEL-NEXT:    br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
1443; LOOP-DEL:       mismatch_vec_loop_inc:
1444; LOOP-DEL-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP19]] to i64
1445; LOOP-DEL-NEXT:    [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]]
1446; LOOP-DEL-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]]
1447; LOOP-DEL-NEXT:    br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[WHILE_END]]
1448; LOOP-DEL:       mismatch_vec_loop_found:
1449; LOOP-DEL-NEXT:    [[FIRST2:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
1450; LOOP-DEL-NEXT:    [[MISMATCH_VECTOR_INDEX3:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
1451; LOOP-DEL-NEXT:    [[TMP26:%.*]] = zext i32 [[FIRST2]] to i64
1452; LOOP-DEL-NEXT:    [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX3]], [[TMP26]]
1453; LOOP-DEL-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
1454; LOOP-DEL-NEXT:    br label [[WHILE_END]]
1455; LOOP-DEL:       mismatch_loop_pre:
1456; LOOP-DEL-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1457; LOOP-DEL:       mismatch_loop:
1458; LOOP-DEL-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1459; LOOP-DEL-NEXT:    [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1460; LOOP-DEL-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]]
1461; LOOP-DEL-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
1462; LOOP-DEL-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]]
1463; LOOP-DEL-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1
1464; LOOP-DEL-NEXT:    [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]]
1465; LOOP-DEL-NEXT:    br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[WHILE_END]]
1466; LOOP-DEL:       mismatch_loop_inc:
1467; LOOP-DEL-NEXT:    [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1
1468; LOOP-DEL-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1469; LOOP-DEL-NEXT:    br i1 [[TMP36]], label [[WHILE_END]], label [[MISMATCH_LOOP]]
1470; LOOP-DEL:       while.end:
1471; LOOP-DEL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
1472; LOOP-DEL-NEXT:    ret i32 [[INC_LCSSA]]
1473;
1474; MASKED-LABEL: define i32 @compare_bytes_extra_cmp(
1475; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
1476; MASKED-NEXT:  entry:
1477; MASKED-NEXT:    [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
1478; MASKED-NEXT:    br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
1479; MASKED:       ph:
1480; MASKED-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], 1
1481; MASKED-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1482; MASKED:       mismatch_min_it_check:
1483; MASKED-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1484; MASKED-NEXT:    [[TMP2:%.*]] = zext i32 [[N]] to i64
1485; MASKED-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]]
1486; MASKED-NEXT:    br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1487; MASKED:       mismatch_mem_check:
1488; MASKED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
1489; MASKED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
1490; MASKED-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
1491; MASKED-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64
1492; MASKED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
1493; MASKED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
1494; MASKED-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
1495; MASKED-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64
1496; MASKED-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP7]], 12
1497; MASKED-NEXT:    [[TMP13:%.*]] = lshr i64 [[TMP10]], 12
1498; MASKED-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP6]], 12
1499; MASKED-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP11]], 12
1500; MASKED-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]]
1501; MASKED-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
1502; MASKED-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
1503; MASKED-NEXT:    br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1504; MASKED:       mismatch_vec_loop_preheader:
1505; MASKED-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]])
1506; MASKED-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
1507; MASKED-NEXT:    [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16
1508; MASKED-NEXT:    br label [[MISMATCH_VEC_LOOP:%.*]]
1509; MASKED:       mismatch_vec_loop:
1510; MASKED-NEXT:    [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
1511; MASKED-NEXT:    [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
1512; MASKED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]]
1513; MASKED-NEXT:    [[TMP23:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
1514; MASKED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]]
1515; MASKED-NEXT:    [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
1516; MASKED-NEXT:    [[TMP26:%.*]] = icmp ne <vscale x 16 x i8> [[TMP23]], [[TMP25]]
1517; MASKED-NEXT:    [[TMP27:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP26]], <vscale x 16 x i1> zeroinitializer
1518; MASKED-NEXT:    [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP27]])
1519; MASKED-NEXT:    br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
1520; MASKED:       mismatch_vec_loop_inc:
1521; MASKED-NEXT:    [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]]
1522; MASKED-NEXT:    [[TMP30]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]])
1523; MASKED-NEXT:    [[TMP31:%.*]] = extractelement <vscale x 16 x i1> [[TMP30]], i64 0
1524; MASKED-NEXT:    br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
1525; MASKED:       mismatch_vec_loop_found:
1526; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ]
1527; MASKED-NEXT:    [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
1528; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
1529; MASKED-NEXT:    [[TMP32:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
1530; MASKED-NEXT:    [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
1531; MASKED-NEXT:    [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
1532; MASKED-NEXT:    [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]]
1533; MASKED-NEXT:    [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32
1534; MASKED-NEXT:    br label [[MISMATCH_END]]
1535; MASKED:       mismatch_loop_pre:
1536; MASKED-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1537; MASKED:       mismatch_loop:
1538; MASKED-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1539; MASKED-NEXT:    [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1540; MASKED-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]]
1541; MASKED-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1
1542; MASKED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]]
1543; MASKED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
1544; MASKED-NEXT:    [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]]
1545; MASKED-NEXT:    br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1546; MASKED:       mismatch_loop_inc:
1547; MASKED-NEXT:    [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1
1548; MASKED-NEXT:    [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]]
1549; MASKED-NEXT:    br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1550; MASKED:       mismatch_end:
1551; MASKED-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ]
1552; MASKED-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1553; MASKED:       while.cond:
1554; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
1555; MASKED-NEXT:    [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
1556; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
1557; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
1558; MASKED:       while.body:
1559; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1560; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1561; MASKED-NEXT:    [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1562; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1563; MASKED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1564; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
1565; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
1566; MASKED:       byte.compare:
1567; MASKED-NEXT:    br label [[WHILE_END_LOOPEXIT]]
1568; MASKED:       while.end.loopexit:
1569; MASKED-NEXT:    [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1570; MASKED-NEXT:    br label [[WHILE_END]]
1571; MASKED:       while.end:
1572; MASKED-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
1573; MASKED-NEXT:    ret i32 [[INC_LCSSA]]
1574;
1575; NO-TRANSFORM-LABEL: define i32 @compare_bytes_extra_cmp(
1576; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) {
1577; NO-TRANSFORM-NEXT:  entry:
1578; NO-TRANSFORM-NEXT:    [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]]
1579; NO-TRANSFORM-NEXT:    br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]]
1580; NO-TRANSFORM:       ph:
1581; NO-TRANSFORM-NEXT:    br label [[WHILE_COND:%.*]]
1582; NO-TRANSFORM:       while.cond:
1583; NO-TRANSFORM-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
1584; NO-TRANSFORM-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
1585; NO-TRANSFORM-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
1586; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
1587; NO-TRANSFORM:       while.body:
1588; NO-TRANSFORM-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1589; NO-TRANSFORM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1590; NO-TRANSFORM-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1591; NO-TRANSFORM-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1592; NO-TRANSFORM-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1593; NO-TRANSFORM-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1594; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
1595; NO-TRANSFORM:       while.end:
1596; NO-TRANSFORM-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ]
1597; NO-TRANSFORM-NEXT:    ret i32 [[INC_LCSSA]]
1598entry:
1599  %cmp.x = icmp ult i32 %n, %x
1600  br i1 %cmp.x, label %ph, label %while.end
1601
1602ph:
1603  br label %while.cond
1604
1605while.cond:
1606  %len.addr = phi i32 [ %len, %ph ], [ %inc, %while.body ]
1607  %inc = add i32 %len.addr, 1
1608  %cmp.not = icmp eq i32 %inc, %n
1609  br i1 %cmp.not, label %while.end, label %while.body
1610
1611while.body:
1612  %idxprom = zext i32 %inc to i64
1613  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
1614  %0 = load i8, ptr %arrayidx
1615  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
1616  %1 = load i8, ptr %arrayidx2
1617  %cmp.not2 = icmp eq i8 %0, %1
1618  br i1 %cmp.not2, label %while.cond, label %while.end
1619
1620while.end:
1621  %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ], [ %x, %entry ]
1622  ret i32 %inc.lcssa
1623}
1624
1625define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) {
1626; CHECK-LABEL: define void @compare_bytes_cleanup_block(
1627; CHECK-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] {
1628; CHECK-NEXT:  entry:
1629; CHECK-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1630; CHECK:       mismatch_min_it_check:
1631; CHECK-NEXT:    br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1632; CHECK:       mismatch_mem_check:
1633; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1
1634; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1
1635; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP1]] to i64
1636; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64
1637; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0
1638; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0
1639; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
1640; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
1641; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP10]], 12
1642; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 12
1643; CHECK-NEXT:    [[TMP8:%.*]] = lshr i64 [[TMP9]], 12
1644; CHECK-NEXT:    [[TMP11:%.*]] = lshr i64 [[TMP7]], 12
1645; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP2]], [[TMP5]]
1646; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]]
1647; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
1648; CHECK-NEXT:    br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1649; CHECK:       mismatch_vec_loop_preheader:
1650; CHECK-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
1651; CHECK:       mismatch_vec_loop:
1652; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP20:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
1653; CHECK-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 0, [[MISMATCH_VECTOR_INDEX]]
1654; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
1655; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VECTOR_INDEX]]
1656; CHECK-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]])
1657; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VECTOR_INDEX]]
1658; CHECK-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]])
1659; CHECK-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]])
1660; CHECK-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]])
1661; CHECK-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[FIRST]], [[TMP15]]
1662; CHECK-NEXT:    br i1 [[TMP18]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
1663; CHECK:       mismatch_vec_loop_inc:
1664; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP15]] to i64
1665; CHECK-NEXT:    [[TMP20]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP19]]
1666; CHECK-NEXT:    [[TMP21:%.*]] = icmp ne i64 [[TMP20]], 0
1667; CHECK-NEXT:    br i1 [[TMP21]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
1668; CHECK:       mismatch_vec_loop_found:
1669; CHECK-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
1670; CHECK-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
1671; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[FIRST1]] to i64
1672; CHECK-NEXT:    [[TMP23:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP22]]
1673; CHECK-NEXT:    [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
1674; CHECK-NEXT:    br label [[MISMATCH_END]]
1675; CHECK:       mismatch_loop_pre:
1676; CHECK-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1677; CHECK:       mismatch_loop:
1678; CHECK-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP31:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1679; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1680; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP25]]
1681; CHECK-NEXT:    [[TMP27:%.*]] = load i8, ptr [[TMP26]], align 1
1682; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP25]]
1683; CHECK-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
1684; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i8 [[TMP27]], [[TMP29]]
1685; CHECK-NEXT:    br i1 [[TMP30]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1686; CHECK:       mismatch_loop_inc:
1687; CHECK-NEXT:    [[TMP31]] = add i32 [[MISMATCH_INDEX]], 1
1688; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
1689; CHECK-NEXT:    br i1 [[TMP32]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1690; CHECK:       mismatch_end:
1691; CHECK-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP24]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
1692; CHECK-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1693; CHECK:       while.cond:
1694; CHECK-NEXT:    [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ]
1695; CHECK-NEXT:    [[INC:%.*]] = add i32 [[LEN]], 1
1696; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
1697; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]]
1698; CHECK:       while.body:
1699; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1700; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
1701; CHECK-NEXT:    [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1702; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
1703; CHECK-NEXT:    [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1704; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP33]], [[TMP34]]
1705; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]]
1706; CHECK:       byte.compare:
1707; CHECK-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
1708; CHECK-NEXT:    br i1 [[TMP35]], label [[CLEANUP_THREAD]], label [[IF_END]]
1709; CHECK:       cleanup.thread:
1710; CHECK-NEXT:    ret void
1711; CHECK:       if.end:
1712; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1713; CHECK-NEXT:    ret void
1714;
1715; LMUL8-LABEL: define void @compare_bytes_cleanup_block(
1716; LMUL8-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] {
1717; LMUL8-NEXT:  entry:
1718; LMUL8-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1719; LMUL8:       mismatch_min_it_check:
1720; LMUL8-NEXT:    br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1721; LMUL8:       mismatch_mem_check:
1722; LMUL8-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1
1723; LMUL8-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1
1724; LMUL8-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP1]] to i64
1725; LMUL8-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64
1726; LMUL8-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0
1727; LMUL8-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0
1728; LMUL8-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
1729; LMUL8-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
1730; LMUL8-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP10]], 12
1731; LMUL8-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 12
1732; LMUL8-NEXT:    [[TMP8:%.*]] = lshr i64 [[TMP9]], 12
1733; LMUL8-NEXT:    [[TMP11:%.*]] = lshr i64 [[TMP7]], 12
1734; LMUL8-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP2]], [[TMP5]]
1735; LMUL8-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]]
1736; LMUL8-NEXT:    [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
1737; LMUL8-NEXT:    br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1738; LMUL8:       mismatch_vec_loop_preheader:
1739; LMUL8-NEXT:    br label [[MISMATCH_VECTOR_LOOP:%.*]]
1740; LMUL8:       mismatch_vec_loop:
1741; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP20:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ]
1742; LMUL8-NEXT:    [[AVL:%.*]] = sub nuw nsw i64 0, [[MISMATCH_VECTOR_INDEX]]
1743; LMUL8-NEXT:    [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true)
1744; LMUL8-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VECTOR_INDEX]]
1745; LMUL8-NEXT:    [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP16]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]])
1746; LMUL8-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VECTOR_INDEX]]
1747; LMUL8-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP17]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]])
1748; LMUL8-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]])
1749; LMUL8-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]])
1750; LMUL8-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[FIRST]], [[TMP15]]
1751; LMUL8-NEXT:    br i1 [[TMP18]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]]
1752; LMUL8:       mismatch_vec_loop_inc:
1753; LMUL8-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP15]] to i64
1754; LMUL8-NEXT:    [[TMP20]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP19]]
1755; LMUL8-NEXT:    [[TMP21:%.*]] = icmp ne i64 [[TMP20]], 0
1756; LMUL8-NEXT:    br i1 [[TMP21]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]]
1757; LMUL8:       mismatch_vec_loop_found:
1758; LMUL8-NEXT:    [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ]
1759; LMUL8-NEXT:    [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ]
1760; LMUL8-NEXT:    [[TMP22:%.*]] = zext i32 [[FIRST1]] to i64
1761; LMUL8-NEXT:    [[TMP23:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP22]]
1762; LMUL8-NEXT:    [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
1763; LMUL8-NEXT:    br label [[MISMATCH_END]]
1764; LMUL8:       mismatch_loop_pre:
1765; LMUL8-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1766; LMUL8:       mismatch_loop:
1767; LMUL8-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP31:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1768; LMUL8-NEXT:    [[TMP25:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1769; LMUL8-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP25]]
1770; LMUL8-NEXT:    [[TMP27:%.*]] = load i8, ptr [[TMP26]], align 1
1771; LMUL8-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP25]]
1772; LMUL8-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
1773; LMUL8-NEXT:    [[TMP30:%.*]] = icmp eq i8 [[TMP27]], [[TMP29]]
1774; LMUL8-NEXT:    br i1 [[TMP30]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1775; LMUL8:       mismatch_loop_inc:
1776; LMUL8-NEXT:    [[TMP31]] = add i32 [[MISMATCH_INDEX]], 1
1777; LMUL8-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
1778; LMUL8-NEXT:    br i1 [[TMP32]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1779; LMUL8:       mismatch_end:
1780; LMUL8-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP24]], [[MISMATCH_VECTOR_LOOP_FOUND]] ]
1781; LMUL8-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1782; LMUL8:       while.cond:
1783; LMUL8-NEXT:    [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ]
1784; LMUL8-NEXT:    [[INC:%.*]] = add i32 [[LEN]], 1
1785; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
1786; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]]
1787; LMUL8:       while.body:
1788; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1789; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
1790; LMUL8-NEXT:    [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1791; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
1792; LMUL8-NEXT:    [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1793; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP33]], [[TMP34]]
1794; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]]
1795; LMUL8:       byte.compare:
1796; LMUL8-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
1797; LMUL8-NEXT:    br i1 [[TMP35]], label [[CLEANUP_THREAD]], label [[IF_END]]
1798; LMUL8:       cleanup.thread:
1799; LMUL8-NEXT:    ret void
1800; LMUL8:       if.end:
1801; LMUL8-NEXT:    [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1802; LMUL8-NEXT:    ret void
1803;
1804; LOOP-DEL-LABEL: define void @compare_bytes_cleanup_block(
1805; LOOP-DEL-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] {
1806; LOOP-DEL-NEXT:  entry:
1807; LOOP-DEL-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1808; LOOP-DEL:       mismatch_loop:
1809; LOOP-DEL-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[MISMATCH_LOOP]] ]
1810; LOOP-DEL-NEXT:    [[TMP0:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1811; LOOP-DEL-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP0]]
1812; LOOP-DEL-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
1813; LOOP-DEL-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP0]]
1814; LOOP-DEL-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
1815; LOOP-DEL-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP2]], [[TMP4]]
1816; LOOP-DEL-NEXT:    [[TMP6]] = add i32 [[MISMATCH_INDEX]], 1
1817; LOOP-DEL-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
1818; LOOP-DEL-NEXT:    [[OR_COND:%.*]] = or i1 [[TMP5]], [[TMP7]]
1819; LOOP-DEL-NEXT:    br i1 [[OR_COND]], label [[COMMON_RET:%.*]], label [[MISMATCH_LOOP]]
1820; LOOP-DEL:       common.ret:
1821; LOOP-DEL-NEXT:    ret void
1822;
1823; MASKED-LABEL: define void @compare_bytes_cleanup_block(
1824; MASKED-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] {
1825; MASKED-NEXT:  entry:
1826; MASKED-NEXT:    br label [[MISMATCH_MIN_IT_CHECK:%.*]]
1827; MASKED:       mismatch_min_it_check:
1828; MASKED-NEXT:    br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]]
1829; MASKED:       mismatch_mem_check:
1830; MASKED-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1
1831; MASKED-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1
1832; MASKED-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
1833; MASKED-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
1834; MASKED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0
1835; MASKED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0
1836; MASKED-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64
1837; MASKED-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64
1838; MASKED-NEXT:    [[TMP8:%.*]] = lshr i64 [[TMP3]], 12
1839; MASKED-NEXT:    [[TMP9:%.*]] = lshr i64 [[TMP6]], 12
1840; MASKED-NEXT:    [[TMP10:%.*]] = lshr i64 [[TMP2]], 12
1841; MASKED-NEXT:    [[TMP11:%.*]] = lshr i64 [[TMP7]], 12
1842; MASKED-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP9]]
1843; MASKED-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
1844; MASKED-NEXT:    [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
1845; MASKED-NEXT:    br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]]
1846; MASKED:       mismatch_vec_loop_preheader:
1847; MASKED-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 1, i64 0)
1848; MASKED-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
1849; MASKED-NEXT:    [[TMP17:%.*]] = mul nuw nsw i64 [[TMP16]], 16
1850; MASKED-NEXT:    br label [[MISMATCH_VEC_LOOP:%.*]]
1851; MASKED:       mismatch_vec_loop:
1852; MASKED-NEXT:    [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP15]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP26:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ]
1853; MASKED-NEXT:    [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP25:%.*]], [[MISMATCH_VEC_LOOP_INC]] ]
1854; MASKED-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VEC_INDEX]]
1855; MASKED-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
1856; MASKED-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VEC_INDEX]]
1857; MASKED-NEXT:    [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i8> zeroinitializer)
1858; MASKED-NEXT:    [[TMP22:%.*]] = icmp ne <vscale x 16 x i8> [[TMP19]], [[TMP21]]
1859; MASKED-NEXT:    [[TMP23:%.*]] = select <vscale x 16 x i1> [[MISMATCH_VEC_LOOP_PRED]], <vscale x 16 x i1> [[TMP22]], <vscale x 16 x i1> zeroinitializer
1860; MASKED-NEXT:    [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP23]])
1861; MASKED-NEXT:    br i1 [[TMP24]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]]
1862; MASKED:       mismatch_vec_loop_inc:
1863; MASKED-NEXT:    [[TMP25]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP17]]
1864; MASKED-NEXT:    [[TMP26]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP25]], i64 0)
1865; MASKED-NEXT:    [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i64 0
1866; MASKED-NEXT:    br i1 [[TMP27]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]]
1867; MASKED:       mismatch_vec_loop_found:
1868; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi <vscale x 16 x i1> [ [[TMP23]], [[MISMATCH_VEC_LOOP]] ]
1869; MASKED-NEXT:    [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ]
1870; MASKED-NEXT:    [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ]
1871; MASKED-NEXT:    [[TMP28:%.*]] = and <vscale x 16 x i1> [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]]
1872; MASKED-NEXT:    [[TMP29:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[TMP28]], i1 true)
1873; MASKED-NEXT:    [[TMP30:%.*]] = zext i32 [[TMP29]] to i64
1874; MASKED-NEXT:    [[TMP31:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP30]]
1875; MASKED-NEXT:    [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
1876; MASKED-NEXT:    br label [[MISMATCH_END]]
1877; MASKED:       mismatch_loop_pre:
1878; MASKED-NEXT:    br label [[MISMATCH_LOOP:%.*]]
1879; MASKED:       mismatch_loop:
1880; MASKED-NEXT:    [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP39:%.*]], [[MISMATCH_LOOP_INC:%.*]] ]
1881; MASKED-NEXT:    [[TMP33:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64
1882; MASKED-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP33]]
1883; MASKED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
1884; MASKED-NEXT:    [[TMP36:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP33]]
1885; MASKED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
1886; MASKED-NEXT:    [[TMP38:%.*]] = icmp eq i8 [[TMP35]], [[TMP37]]
1887; MASKED-NEXT:    br i1 [[TMP38]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]]
1888; MASKED:       mismatch_loop_inc:
1889; MASKED-NEXT:    [[TMP39]] = add i32 [[MISMATCH_INDEX]], 1
1890; MASKED-NEXT:    [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 0
1891; MASKED-NEXT:    br i1 [[TMP40]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]]
1892; MASKED:       mismatch_end:
1893; MASKED-NEXT:    [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP32]], [[MISMATCH_VEC_LOOP_FOUND]] ]
1894; MASKED-NEXT:    br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]]
1895; MASKED:       while.cond:
1896; MASKED-NEXT:    [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ]
1897; MASKED-NEXT:    [[INC:%.*]] = add i32 [[LEN]], 1
1898; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
1899; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]]
1900; MASKED:       while.body:
1901; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
1902; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
1903; MASKED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1904; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
1905; MASKED-NEXT:    [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1906; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]]
1907; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]]
1908; MASKED:       byte.compare:
1909; MASKED-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0
1910; MASKED-NEXT:    br i1 [[TMP43]], label [[CLEANUP_THREAD]], label [[IF_END]]
1911; MASKED:       cleanup.thread:
1912; MASKED-NEXT:    ret void
1913; MASKED:       if.end:
1914; MASKED-NEXT:    [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
1915; MASKED-NEXT:    ret void
1916;
1917; NO-TRANSFORM-LABEL: define void @compare_bytes_cleanup_block(
1918; NO-TRANSFORM-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) {
1919; NO-TRANSFORM-NEXT:  entry:
1920; NO-TRANSFORM-NEXT:    br label [[WHILE_COND:%.*]]
1921; NO-TRANSFORM:       while.cond:
1922; NO-TRANSFORM-NEXT:    [[LEN:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
1923; NO-TRANSFORM-NEXT:    [[INC]] = add i32 [[LEN]], 1
1924; NO-TRANSFORM-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], 0
1925; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]]
1926; NO-TRANSFORM:       while.body:
1927; NO-TRANSFORM-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1928; NO-TRANSFORM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]]
1929; NO-TRANSFORM-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1930; NO-TRANSFORM-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]]
1931; NO-TRANSFORM-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1932; NO-TRANSFORM-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1933; NO-TRANSFORM-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]]
1934; NO-TRANSFORM:       cleanup.thread:
1935; NO-TRANSFORM-NEXT:    ret void
1936; NO-TRANSFORM:       if.end:
1937; NO-TRANSFORM-NEXT:    [[RES:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
1938; NO-TRANSFORM-NEXT:    ret void
1939entry:
1940  br label %while.cond
1941
1942while.cond:
1943  %len = phi i32 [ %inc, %while.body ], [ 0, %entry ]
1944  %inc = add i32 %len, 1
1945  %cmp.not = icmp eq i32 %inc, 0
1946  br i1 %cmp.not, label %cleanup.thread, label %while.body
1947
1948while.body:
1949  %idxprom = zext i32 %inc to i64
1950  %arrayidx = getelementptr i8, ptr %src1, i64 %idxprom
1951  %0 = load i8, ptr %arrayidx, align 1
1952  %arrayidx2 = getelementptr i8, ptr %src2, i64 %idxprom
1953  %1 = load i8, ptr %arrayidx2, align 1
1954  %cmp.not2 = icmp eq i8 %0, %1
1955  br i1 %cmp.not2, label %while.cond, label %if.end
1956
1957cleanup.thread:
1958  ret void
1959
1960if.end:
1961  %res = phi i32 [ %inc, %while.body ]
1962  ret void
1963}
1964
1965;
1966; NEGATIVE TESTS
1967;
1968
1969; Similar to @compare_bytes_simple, except in the while.end block we have an extra PHI
1970; with unique values for each incoming block from the loop.
1971define i32 @compare_bytes_simple2(ptr %a, ptr %b, ptr %c, ptr %d, i32 %len, i32 %n) {
1972; CHECK-LABEL: define i32 @compare_bytes_simple2(
1973; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1974; CHECK-NEXT:  entry:
1975; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
1976; CHECK:       while.cond:
1977; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
1978; CHECK-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
1979; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
1980; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
1981; CHECK:       while.body:
1982; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1983; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1984; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1985; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1986; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1987; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1988; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
1989; CHECK:       while.end:
1990; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
1991; CHECK-NEXT:    [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
1992; CHECK-NEXT:    store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
1993; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
1994;
1995; LMUL8-LABEL: define i32 @compare_bytes_simple2(
1996; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1997; LMUL8-NEXT:  entry:
1998; LMUL8-NEXT:    br label [[WHILE_COND:%.*]]
1999; LMUL8:       while.cond:
2000; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2001; LMUL8-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2002; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2003; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2004; LMUL8:       while.body:
2005; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2006; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2007; LMUL8-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2008; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2009; LMUL8-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2010; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2011; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2012; LMUL8:       while.end:
2013; LMUL8-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2014; LMUL8-NEXT:    [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
2015; LMUL8-NEXT:    store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
2016; LMUL8-NEXT:    ret i32 [[INC_LCSSA]]
2017;
2018; LOOP-DEL-LABEL: define i32 @compare_bytes_simple2(
2019; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
2020; LOOP-DEL-NEXT:  entry:
2021; LOOP-DEL-NEXT:    br label [[WHILE_COND:%.*]]
2022; LOOP-DEL:       while.cond:
2023; LOOP-DEL-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2024; LOOP-DEL-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2025; LOOP-DEL-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2026; LOOP-DEL-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2027; LOOP-DEL:       while.body:
2028; LOOP-DEL-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2029; LOOP-DEL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2030; LOOP-DEL-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2031; LOOP-DEL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2032; LOOP-DEL-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2033; LOOP-DEL-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2034; LOOP-DEL-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2035; LOOP-DEL:       while.end:
2036; LOOP-DEL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2037; LOOP-DEL-NEXT:    [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
2038; LOOP-DEL-NEXT:    store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
2039; LOOP-DEL-NEXT:    ret i32 [[INC_LCSSA]]
2040;
2041; MASKED-LABEL: define i32 @compare_bytes_simple2(
2042; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
2043; MASKED-NEXT:  entry:
2044; MASKED-NEXT:    br label [[WHILE_COND:%.*]]
2045; MASKED:       while.cond:
2046; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2047; MASKED-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2048; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2049; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2050; MASKED:       while.body:
2051; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2052; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2053; MASKED-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2054; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2055; MASKED-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2056; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2057; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2058; MASKED:       while.end:
2059; MASKED-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2060; MASKED-NEXT:    [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ]
2061; MASKED-NEXT:    store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4
2062; MASKED-NEXT:    ret i32 [[INC_LCSSA]]
2063;
2064entry:
2065  br label %while.cond
2066
2067while.cond:
2068  %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
2069  %inc = add i32 %len.addr, 1
2070  %cmp.not = icmp eq i32 %inc, %n
2071  br i1 %cmp.not, label %while.end, label %while.body
2072
2073while.body:
2074  %idxprom = zext i32 %inc to i64
2075  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
2076  %0 = load i8, ptr %arrayidx
2077  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
2078  %1 = load i8, ptr %arrayidx2
2079  %cmp.not2 = icmp eq i8 %0, %1
2080  br i1 %cmp.not2, label %while.cond, label %while.end
2081
2082while.end:
2083  %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ]
2084  %final_ptr = phi ptr [ %c, %while.body ], [ %d, %while.cond ]
2085  store i32 %inc.lcssa, ptr %final_ptr
2086  ret i32 %inc.lcssa
2087}
2088
2089define i32 @compare_bytes_simple3(ptr %a, ptr %b, ptr %c, i32 %d, i32 %len, i32 %n) {
2090; CHECK-LABEL: define i32 @compare_bytes_simple3(
2091; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
2092; CHECK-NEXT:  entry:
2093; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
2094; CHECK:       while.cond:
2095; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2096; CHECK-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2097; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2098; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2099; CHECK:       while.body:
2100; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2101; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2102; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2103; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2104; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2105; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2106; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2107; CHECK:       while.end:
2108; CHECK-NEXT:    [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2109; CHECK-NEXT:    store i32 [[FINAL_VAL]], ptr [[C]], align 4
2110; CHECK-NEXT:    ret i32 [[FINAL_VAL]]
2111;
2112; LMUL8-LABEL: define i32 @compare_bytes_simple3(
2113; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
2114; LMUL8-NEXT:  entry:
2115; LMUL8-NEXT:    br label [[WHILE_COND:%.*]]
2116; LMUL8:       while.cond:
2117; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2118; LMUL8-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2119; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2120; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2121; LMUL8:       while.body:
2122; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2123; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2124; LMUL8-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2125; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2126; LMUL8-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2127; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2128; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2129; LMUL8:       while.end:
2130; LMUL8-NEXT:    [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2131; LMUL8-NEXT:    store i32 [[FINAL_VAL]], ptr [[C]], align 4
2132; LMUL8-NEXT:    ret i32 [[FINAL_VAL]]
2133;
2134; LOOP-DEL-LABEL: define i32 @compare_bytes_simple3(
2135; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
2136; LOOP-DEL-NEXT:  entry:
2137; LOOP-DEL-NEXT:    br label [[WHILE_COND:%.*]]
2138; LOOP-DEL:       while.cond:
2139; LOOP-DEL-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2140; LOOP-DEL-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2141; LOOP-DEL-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2142; LOOP-DEL-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2143; LOOP-DEL:       while.body:
2144; LOOP-DEL-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2145; LOOP-DEL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2146; LOOP-DEL-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2147; LOOP-DEL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2148; LOOP-DEL-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2149; LOOP-DEL-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2150; LOOP-DEL-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2151; LOOP-DEL:       while.end:
2152; LOOP-DEL-NEXT:    [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2153; LOOP-DEL-NEXT:    store i32 [[FINAL_VAL]], ptr [[C]], align 4
2154; LOOP-DEL-NEXT:    ret i32 [[FINAL_VAL]]
2155;
2156; MASKED-LABEL: define i32 @compare_bytes_simple3(
2157; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
2158; MASKED-NEXT:  entry:
2159; MASKED-NEXT:    br label [[WHILE_COND:%.*]]
2160; MASKED:       while.cond:
2161; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2162; MASKED-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2163; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2164; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2165; MASKED:       while.body:
2166; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2167; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2168; MASKED-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2169; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2170; MASKED-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2171; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2172; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2173; MASKED:       while.end:
2174; MASKED-NEXT:    [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2175; MASKED-NEXT:    store i32 [[FINAL_VAL]], ptr [[C]], align 4
2176; MASKED-NEXT:    ret i32 [[FINAL_VAL]]
2177;
2178  entry:
2179  br label %while.cond
2180
2181  while.cond:
2182  %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
2183  %inc = add i32 %len.addr, 1
2184  %cmp.not = icmp eq i32 %inc, %n
2185  br i1 %cmp.not, label %while.end, label %while.body
2186
2187  while.body:
2188  %idxprom = zext i32 %inc to i64
2189  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
2190  %0 = load i8, ptr %arrayidx
2191  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
2192  %1 = load i8, ptr %arrayidx2
2193  %cmp.not2 = icmp eq i8 %0, %1
2194  br i1 %cmp.not2, label %while.cond, label %while.end
2195
2196  while.end:
2197  %final_val = phi i32 [ %d, %while.body ], [ %inc, %while.cond ]
2198  store i32 %final_val, ptr %c
2199  ret i32 %final_val
2200}
2201
2202; Disable the optimization when noimplicitfloat is present.
2203define i32 @no_implicit_float(ptr %a, ptr %b, i32 %len, i32 %n) noimplicitfloat {
2204; CHECK-LABEL: define i32 @no_implicit_float(
2205; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
2206; CHECK-NEXT:  entry:
2207; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
2208; CHECK:       while.cond:
2209; CHECK-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2210; CHECK-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2211; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2212; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2213; CHECK:       while.body:
2214; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2215; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2216; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2217; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2218; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2219; CHECK-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2220; CHECK-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2221; CHECK:       while.end:
2222; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2223; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
2224;
2225; LMUL8-LABEL: define i32 @no_implicit_float(
2226; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
2227; LMUL8-NEXT:  entry:
2228; LMUL8-NEXT:    br label [[WHILE_COND:%.*]]
2229; LMUL8:       while.cond:
2230; LMUL8-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2231; LMUL8-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2232; LMUL8-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2233; LMUL8-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2234; LMUL8:       while.body:
2235; LMUL8-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2236; LMUL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2237; LMUL8-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2238; LMUL8-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2239; LMUL8-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2240; LMUL8-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2241; LMUL8-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2242; LMUL8:       while.end:
2243; LMUL8-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2244; LMUL8-NEXT:    ret i32 [[INC_LCSSA]]
2245;
2246; LOOP-DEL-LABEL: define i32 @no_implicit_float(
2247; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
2248; LOOP-DEL-NEXT:  entry:
2249; LOOP-DEL-NEXT:    br label [[WHILE_COND:%.*]]
2250; LOOP-DEL:       while.cond:
2251; LOOP-DEL-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2252; LOOP-DEL-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2253; LOOP-DEL-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2254; LOOP-DEL-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2255; LOOP-DEL:       while.body:
2256; LOOP-DEL-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2257; LOOP-DEL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2258; LOOP-DEL-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2259; LOOP-DEL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2260; LOOP-DEL-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2261; LOOP-DEL-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2262; LOOP-DEL-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2263; LOOP-DEL:       while.end:
2264; LOOP-DEL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2265; LOOP-DEL-NEXT:    ret i32 [[INC_LCSSA]]
2266;
2267; MASKED-LABEL: define i32 @no_implicit_float(
2268; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
2269; MASKED-NEXT:  entry:
2270; MASKED-NEXT:    br label [[WHILE_COND:%.*]]
2271; MASKED:       while.cond:
2272; MASKED-NEXT:    [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
2273; MASKED-NEXT:    [[INC]] = add i32 [[LEN_ADDR]], 1
2274; MASKED-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
2275; MASKED-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
2276; MASKED:       while.body:
2277; MASKED-NEXT:    [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
2278; MASKED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
2279; MASKED-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2280; MASKED-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
2281; MASKED-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
2282; MASKED-NEXT:    [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
2283; MASKED-NEXT:    br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
2284; MASKED:       while.end:
2285; MASKED-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
2286; MASKED-NEXT:    ret i32 [[INC_LCSSA]]
2287;
2288entry:
2289  br label %while.cond
2290
2291while.cond:
2292  %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
2293  %inc = add i32 %len.addr, 1
2294  %cmp.not = icmp eq i32 %inc, %n
2295  br i1 %cmp.not, label %while.end, label %while.body
2296
2297while.body:
2298  %idxprom = zext i32 %inc to i64
2299  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
2300  %0 = load i8, ptr %arrayidx
2301  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
2302  %1 = load i8, ptr %arrayidx2
2303  %cmp.not2 = icmp eq i8 %0, %1
2304  br i1 %cmp.not2, label %while.cond, label %while.end
2305
2306while.end:
2307  %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ]
2308  ret i32 %inc.lcssa
2309}
2310