xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=COST %s
3; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=FORCED %s
4
5define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
6; COST-LABEL: define void @switch_default_to_latch_common_dest(
7; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
8; COST-NEXT:  [[ENTRY:.*]]:
9; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
10; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
11; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
12; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
13; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
14; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
15; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
16; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
17; COST:       [[VECTOR_PH]]:
18; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
19; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
20; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
21; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
22; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
23; COST:       [[VECTOR_BODY]]:
24; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
26; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
27; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
28; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
29; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
30; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
31; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
32; COST-NEXT:    [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
33; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
34; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
35; COST-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
36; COST-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
37; COST:       [[MIDDLE_BLOCK]]:
38; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
39; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
40; COST:       [[SCALAR_PH]]:
41; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
42; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
43; COST:       [[LOOP_HEADER]]:
44; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
45; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
46; COST-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
47; COST-NEXT:      i64 -12, label %[[IF_THEN:.*]]
48; COST-NEXT:      i64 13, label %[[IF_THEN]]
49; COST-NEXT:    ]
50; COST:       [[IF_THEN]]:
51; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
52; COST-NEXT:    br label %[[LOOP_LATCH]]
53; COST:       [[LOOP_LATCH]]:
54; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
55; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
56; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
57; COST:       [[EXIT]]:
58; COST-NEXT:    ret void
59;
60; FORCED-LABEL: define void @switch_default_to_latch_common_dest(
61; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
62; FORCED-NEXT:  [[ENTRY:.*]]:
63; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
64; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
65; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
66; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
67; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
68; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
69; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
70; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
71; FORCED:       [[VECTOR_PH]]:
72; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
73; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
74; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
75; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
76; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
77; FORCED:       [[VECTOR_BODY]]:
78; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
79; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
80; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
81; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
82; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
83; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
84; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
85; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
86; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
87; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
88; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
89; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
90; FORCED-NEXT:    [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
91; FORCED-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
92; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
93; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
94; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
95; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
96; FORCED-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
97; FORCED:       [[MIDDLE_BLOCK]]:
98; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
99; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
100; FORCED:       [[SCALAR_PH]]:
101; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
102; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
103; FORCED:       [[LOOP_HEADER]]:
104; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
105; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
106; FORCED-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
107; FORCED-NEXT:      i64 -12, label %[[IF_THEN:.*]]
108; FORCED-NEXT:      i64 13, label %[[IF_THEN]]
109; FORCED-NEXT:    ]
110; FORCED:       [[IF_THEN]]:
111; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
112; FORCED-NEXT:    br label %[[LOOP_LATCH]]
113; FORCED:       [[LOOP_LATCH]]:
114; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
115; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
116; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
117; FORCED:       [[EXIT]]:
118; FORCED-NEXT:    ret void
119;
120entry:
121  br label %loop.header
122
123loop.header:
124  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
125  %l = load i64, ptr %ptr.iv, align 1
126  switch i64 %l, label %loop.latch [
127  i64 -12, label %if.then
128  i64 13, label %if.then
129  ]
130
131if.then:
132  store i64 42, ptr %ptr.iv, align 1
133  br label %loop.latch
134
135loop.latch:
136  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
137  %ec = icmp eq ptr %ptr.iv.next, %end
138  br i1 %ec, label %exit, label %loop.header
139
140exit:
141  ret void
142}
143
144define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr %end) {
145; COST-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
146; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
147; COST-NEXT:  [[ENTRY:.*]]:
148; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
149; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
150; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
151; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
152; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
153; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
154; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
155; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
156; COST:       [[VECTOR_PH]]:
157; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
158; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
159; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
160; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
161; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
162; COST:       [[VECTOR_BODY]]:
163; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
164; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
165; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
166; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
167; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
168; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
169; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
170; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
171; COST-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
172; COST-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
173; COST-NEXT:    [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]]
174; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
175; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
176; COST-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
177; COST-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
178; COST:       [[MIDDLE_BLOCK]]:
179; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
180; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
181; COST:       [[SCALAR_PH]]:
182; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
183; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
184; COST:       [[LOOP_HEADER]]:
185; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
186; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
187; COST-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
188; COST-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
189; COST-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
190; COST:       [[IF_THEN]]:
191; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
192; COST-NEXT:    br label %[[LOOP_LATCH]]
193; COST:       [[ELSE]]:
194; COST-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
195; COST:       [[LOOP_LATCH]]:
196; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
197; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
198; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
199; COST:       [[EXIT]]:
200; COST-NEXT:    ret void
201;
202; FORCED-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
203; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
204; FORCED-NEXT:  [[ENTRY:.*]]:
205; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
206; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
207; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
208; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
209; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
210; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
211; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
212; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
213; FORCED:       [[VECTOR_PH]]:
214; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
215; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
216; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
217; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
218; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
219; FORCED:       [[VECTOR_BODY]]:
220; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
221; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
222; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
223; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
224; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
225; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
226; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
227; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
228; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
229; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
230; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
231; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
232; FORCED-NEXT:    [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
233; FORCED-NEXT:    [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
234; FORCED-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
235; FORCED-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
236; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]]
237; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]]
238; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]])
239; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]])
240; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
241; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
242; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
243; FORCED:       [[MIDDLE_BLOCK]]:
244; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
245; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
246; FORCED:       [[SCALAR_PH]]:
247; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
248; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
249; FORCED:       [[LOOP_HEADER]]:
250; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
251; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
252; FORCED-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
253; FORCED-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
254; FORCED-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
255; FORCED:       [[IF_THEN]]:
256; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
257; FORCED-NEXT:    br label %[[LOOP_LATCH]]
258; FORCED:       [[ELSE]]:
259; FORCED-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
260; FORCED:       [[LOOP_LATCH]]:
261; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
262; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
263; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
264; FORCED:       [[EXIT]]:
265; FORCED-NEXT:    ret void
266;
267entry:
268  br label %loop.header
269
270loop.header:
271  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
272  %l = load i64, ptr %ptr.iv, align 1
273  %eq.1 = icmp eq i64 %l, -12
274  %eq.2 = icmp eq i64 %l, 13
275  br i1 %eq.1, label %if.then, label %else
276
277if.then:
278  store i64 42, ptr %ptr.iv, align 1
279  br label %loop.latch
280
281else:
282  br i1 %eq.2, label %if.then, label %loop.latch
283
284loop.latch:
285  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
286  %ec = icmp eq ptr %ptr.iv.next, %end
287  br i1 %ec, label %exit, label %loop.header
288
289exit:
290  ret void
291}
292
293; TODO: Instead of using masked stores, the store can be sunk, executed
294; unconditionally and fed by selects.
295define void @switch_all_dests_distinct(ptr %start, ptr %end) {
296; COST-LABEL: define void @switch_all_dests_distinct(
297; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
298; COST-NEXT:  [[ENTRY:.*]]:
299; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
300; COST:       [[LOOP_HEADER]]:
301; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
302; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
303; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
304; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
305; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
306; COST-NEXT:      i64 0, label %[[IF_THEN_3:.*]]
307; COST-NEXT:    ]
308; COST:       [[IF_THEN_1]]:
309; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
310; COST-NEXT:    br label %[[LOOP_LATCH]]
311; COST:       [[IF_THEN_2]]:
312; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
313; COST-NEXT:    br label %[[LOOP_LATCH]]
314; COST:       [[IF_THEN_3]]:
315; COST-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
316; COST-NEXT:    br label %[[LOOP_LATCH]]
317; COST:       [[DEFAULT]]:
318; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
319; COST-NEXT:    br label %[[LOOP_LATCH]]
320; COST:       [[LOOP_LATCH]]:
321; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
322; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
323; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
324; COST:       [[EXIT]]:
325; COST-NEXT:    ret void
326;
327; FORCED-LABEL: define void @switch_all_dests_distinct(
328; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
329; FORCED-NEXT:  [[ENTRY:.*]]:
330; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
331; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
332; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
333; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
334; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
335; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
336; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
337; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
338; FORCED:       [[VECTOR_PH]]:
339; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
340; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
341; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
342; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
343; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
344; FORCED:       [[VECTOR_BODY]]:
345; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
346; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
347; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
348; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
349; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
350; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
351; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
352; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
353; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
354; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
355; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
356; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
357; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
358; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
359; FORCED-NEXT:    [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
360; FORCED-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
361; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]]
362; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]]
363; FORCED-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
364; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
365; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]])
366; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]])
367; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
368; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
369; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
370; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
371; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]])
372; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]])
373; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
374; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
375; FORCED-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
376; FORCED:       [[MIDDLE_BLOCK]]:
377; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
378; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
379; FORCED:       [[SCALAR_PH]]:
380; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
381; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
382; FORCED:       [[LOOP_HEADER]]:
383; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
384; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
385; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
386; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
387; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
388; FORCED-NEXT:      i64 0, label %[[IF_THEN_3:.*]]
389; FORCED-NEXT:    ]
390; FORCED:       [[IF_THEN_1]]:
391; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
392; FORCED-NEXT:    br label %[[LOOP_LATCH]]
393; FORCED:       [[IF_THEN_2]]:
394; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
395; FORCED-NEXT:    br label %[[LOOP_LATCH]]
396; FORCED:       [[IF_THEN_3]]:
397; FORCED-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
398; FORCED-NEXT:    br label %[[LOOP_LATCH]]
399; FORCED:       [[DEFAULT]]:
400; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
401; FORCED-NEXT:    br label %[[LOOP_LATCH]]
402; FORCED:       [[LOOP_LATCH]]:
403; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
404; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
405; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
406; FORCED:       [[EXIT]]:
407; FORCED-NEXT:    ret void
408;
409entry:
410  br label %loop.header
411
412loop.header:
413  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
414  %l = load i64, ptr %ptr.iv, align 1
415  switch i64 %l, label %default [
416  i64 -12, label %if.then.1
417  i64 13, label %if.then.2
418  i64 0, label %if.then.3
419  ]
420
421if.then.1:
422  store i64 42, ptr %ptr.iv, align 1
423  br label %loop.latch
424
425if.then.2:
426  store i64 0, ptr %ptr.iv, align 1
427  br label %loop.latch
428
429if.then.3:
430  store i64 1, ptr %ptr.iv, align 1
431  br label %loop.latch
432
433default:
434  store i64 2, ptr %ptr.iv, align 1
435  br label %loop.latch
436
437loop.latch:
438  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
439  %ec = icmp eq ptr %ptr.iv.next, %end
440  br i1 %ec, label %exit, label %loop.header
441
442exit:
443  ret void
444}
445
446define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %end) {
447; COST-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
448; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
449; COST-NEXT:  [[ENTRY:.*]]:
450; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
451; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
452; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
453; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
454; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
455; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
456; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
457; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
458; COST:       [[VECTOR_PH]]:
459; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
460; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
461; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
462; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
463; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
464; COST:       [[VECTOR_BODY]]:
465; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
466; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
467; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
468; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
469; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
470; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
471; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
472; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
473; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
474; COST-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
475; COST-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
476; COST-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
477; COST-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
478; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]])
479; COST-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
480; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
481; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]])
482; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
483; COST-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
484; COST-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
485; COST:       [[MIDDLE_BLOCK]]:
486; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
487; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
488; COST:       [[SCALAR_PH]]:
489; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
490; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
491; COST:       [[LOOP_HEADER]]:
492; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
493; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
494; COST-NEXT:    [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
495; COST-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
496; COST-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
497; COST-NEXT:    br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
498; COST:       [[ELSE_1]]:
499; COST-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
500; COST:       [[ELSE_2]]:
501; COST-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
502; COST:       [[IF_THEN_1]]:
503; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
504; COST-NEXT:    br label %[[LOOP_LATCH]]
505; COST:       [[IF_THEN_2]]:
506; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
507; COST-NEXT:    br label %[[LOOP_LATCH]]
508; COST:       [[IF_THEN_3]]:
509; COST-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
510; COST-NEXT:    br label %[[LOOP_LATCH]]
511; COST:       [[DEFAULT:.*:]]
512; COST-NEXT:    store i64 2, ptr poison, align 1
513; COST-NEXT:    unreachable
514; COST:       [[LOOP_LATCH]]:
515; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
516; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
517; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
518; COST:       [[EXIT]]:
519; COST-NEXT:    ret void
520;
521; FORCED-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
522; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
523; FORCED-NEXT:  [[ENTRY:.*]]:
524; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
525; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
526; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
527; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
528; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
529; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
530; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
531; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
532; FORCED:       [[VECTOR_PH]]:
533; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
534; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
535; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
536; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
537; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
538; FORCED:       [[VECTOR_BODY]]:
539; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
540; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
541; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
542; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
543; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
544; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
545; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
546; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
547; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
548; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
549; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
550; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
551; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
552; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
553; FORCED-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
554; FORCED-NEXT:    [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
555; FORCED-NEXT:    [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true)
556; FORCED-NEXT:    [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true)
557; FORCED-NEXT:    [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
558; FORCED-NEXT:    [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
559; FORCED-NEXT:    [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
560; FORCED-NEXT:    [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
561; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]])
562; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]])
563; FORCED-NEXT:    [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
564; FORCED-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
565; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
566; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
567; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
568; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
569; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
570; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
571; FORCED-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
572; FORCED:       [[MIDDLE_BLOCK]]:
573; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
574; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
575; FORCED:       [[SCALAR_PH]]:
576; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
577; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
578; FORCED:       [[LOOP_HEADER]]:
579; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
580; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
581; FORCED-NEXT:    [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
582; FORCED-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
583; FORCED-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
584; FORCED-NEXT:    br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
585; FORCED:       [[ELSE_1]]:
586; FORCED-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
587; FORCED:       [[ELSE_2]]:
588; FORCED-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
589; FORCED:       [[IF_THEN_1]]:
590; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
591; FORCED-NEXT:    br label %[[LOOP_LATCH]]
592; FORCED:       [[IF_THEN_2]]:
593; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
594; FORCED-NEXT:    br label %[[LOOP_LATCH]]
595; FORCED:       [[IF_THEN_3]]:
596; FORCED-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
597; FORCED-NEXT:    br label %[[LOOP_LATCH]]
598; FORCED:       [[DEFAULT:.*:]]
599; FORCED-NEXT:    store i64 2, ptr poison, align 1
600; FORCED-NEXT:    unreachable
601; FORCED:       [[LOOP_LATCH]]:
602; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
603; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
604; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
605; FORCED:       [[EXIT]]:
606; FORCED-NEXT:    ret void
607;
608entry:
609  br label %loop.header
610
611loop.header:
612  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
613  %l = load i64, ptr %ptr.iv, align 1
614  %eq.0 = icmp eq i64 %l, -12
615  %eq.1 = icmp eq i64 %l, 13
616  %eq.2 = icmp eq i64 %l, 0
617  br i1 %eq.0, label %if.then.1, label %else.1
618
619else.1:
620  br i1 %eq.1, label %if.then.2, label %else.2
621
622else.2:
623  br i1 %eq.2, label %if.then.3, label %loop.latch
624
625if.then.1:
626  store i64 42, ptr %ptr.iv, align 1
627  br label %loop.latch
628
629if.then.2:
630  store i64 0, ptr %ptr.iv, align 1
631  br label %loop.latch
632
633if.then.3:
634  store i64 1, ptr %ptr.iv, align 1
635  br label %loop.latch
636
637default:
638  store i64 2, ptr %ptr.iv, align 1
639  br label %loop.latch
640
641loop.latch:
642  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
643  %ec = icmp eq ptr %ptr.iv.next, %end
644  br i1 %ec, label %exit, label %loop.header
645
646exit:
647  ret void
648}
649
650
651
652define void @switch_multiple_common_dests(ptr %start, ptr %end) {
653; COST-LABEL: define void @switch_multiple_common_dests(
654; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
655; COST-NEXT:  [[ENTRY:.*]]:
656; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
657; COST:       [[LOOP_HEADER]]:
658; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
659; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
660; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
661; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
662; COST-NEXT:      i64 0, label %[[IF_THEN_1]]
663; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
664; COST-NEXT:      i64 14, label %[[IF_THEN_2]]
665; COST-NEXT:      i64 15, label %[[IF_THEN_2]]
666; COST-NEXT:    ]
667; COST:       [[IF_THEN_1]]:
668; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
669; COST-NEXT:    br label %[[LOOP_LATCH]]
670; COST:       [[IF_THEN_2]]:
671; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
672; COST-NEXT:    br label %[[LOOP_LATCH]]
673; COST:       [[DEFAULT]]:
674; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
675; COST-NEXT:    br label %[[LOOP_LATCH]]
676; COST:       [[LOOP_LATCH]]:
677; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
678; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
679; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
680; COST:       [[EXIT]]:
681; COST-NEXT:    ret void
682;
683; FORCED-LABEL: define void @switch_multiple_common_dests(
684; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
685; FORCED-NEXT:  [[ENTRY:.*]]:
686; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
687; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
688; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
689; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
690; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
691; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
692; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
693; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
694; FORCED:       [[VECTOR_PH]]:
695; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
696; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
697; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
698; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
699; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
700; FORCED:       [[VECTOR_BODY]]:
701; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
702; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
703; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
704; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
705; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
706; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
707; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
708; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
709; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
710; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
711; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
712; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
713; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
714; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
715; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14)
716; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 14)
717; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15)
718; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 15)
719; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]]
720; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]]
721; FORCED-NEXT:    [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]]
722; FORCED-NEXT:    [[TMP22:%.*]] = or <4 x i1> [[TMP14]], [[TMP18]]
723; FORCED-NEXT:    [[TMP35:%.*]] = or <4 x i1> [[TMP21]], [[TMP15]]
724; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP22]], [[TMP16]]
725; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP27]], [[TMP35]]
726; FORCED-NEXT:    [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]]
727; FORCED-NEXT:    [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], splat (i1 true)
728; FORCED-NEXT:    [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], splat (i1 true)
729; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP35]])
730; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]])
731; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]])
732; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]])
733; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]])
734; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]])
735; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
736; FORCED-NEXT:    [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
737; FORCED-NEXT:    br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
738; FORCED:       [[MIDDLE_BLOCK]]:
739; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
740; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
741; FORCED:       [[SCALAR_PH]]:
742; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
743; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
744; FORCED:       [[LOOP_HEADER]]:
745; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
746; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
747; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
748; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
749; FORCED-NEXT:      i64 0, label %[[IF_THEN_1]]
750; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
751; FORCED-NEXT:      i64 14, label %[[IF_THEN_2]]
752; FORCED-NEXT:      i64 15, label %[[IF_THEN_2]]
753; FORCED-NEXT:    ]
754; FORCED:       [[IF_THEN_1]]:
755; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
756; FORCED-NEXT:    br label %[[LOOP_LATCH]]
757; FORCED:       [[IF_THEN_2]]:
758; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
759; FORCED-NEXT:    br label %[[LOOP_LATCH]]
760; FORCED:       [[DEFAULT]]:
761; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
762; FORCED-NEXT:    br label %[[LOOP_LATCH]]
763; FORCED:       [[LOOP_LATCH]]:
764; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
765; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
766; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
767; FORCED:       [[EXIT]]:
768; FORCED-NEXT:    ret void
769;
770entry:
771  br label %loop.header
772
773loop.header:
774  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
775  %l = load i64, ptr %ptr.iv, align 1
776  switch i64 %l, label %default [
777  i64 -12, label %if.then.1
778  i64 0, label %if.then.1
779  i64 13, label %if.then.2
780  i64 14, label %if.then.2
781  i64 15, label %if.then.2
782  ]
783
784if.then.1:
785  store i64 42, ptr %ptr.iv, align 1
786  br label %loop.latch
787
788if.then.2:
789  store i64 0, ptr %ptr.iv, align 1
790  br label %loop.latch
791
792default:
793  store i64 2, ptr %ptr.iv, align 1
794  br label %loop.latch
795
796loop.latch:
797  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
798  %ec = icmp eq ptr %ptr.iv.next, %end
799  br i1 %ec, label %exit, label %loop.header
800
801exit:
802  ret void
803}
804
805define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
806; COST-LABEL: define void @switch4_default_common_dest_with_case(
807; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
808; COST-NEXT:  [[ENTRY:.*]]:
809; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
810; COST:       [[LOOP_HEADER]]:
811; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
812; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
813; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
814; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
815; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
816; COST-NEXT:      i64 0, label %[[DEFAULT]]
817; COST-NEXT:    ]
818; COST:       [[IF_THEN_1]]:
819; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
820; COST-NEXT:    br label %[[LOOP_LATCH]]
821; COST:       [[IF_THEN_2]]:
822; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
823; COST-NEXT:    br label %[[LOOP_LATCH]]
824; COST:       [[DEFAULT]]:
825; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
826; COST-NEXT:    br label %[[LOOP_LATCH]]
827; COST:       [[LOOP_LATCH]]:
828; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
829; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
830; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
831; COST:       [[EXIT]]:
832; COST-NEXT:    ret void
833;
834; FORCED-LABEL: define void @switch4_default_common_dest_with_case(
835; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
836; FORCED-NEXT:  [[ENTRY:.*]]:
837; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
838; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
839; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
840; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
841; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
842; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
843; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
844; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
845; FORCED:       [[VECTOR_PH]]:
846; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
847; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
848; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
849; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
850; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
851; FORCED:       [[VECTOR_BODY]]:
852; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
853; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
854; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
855; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
856; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
857; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
858; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
859; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
860; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
861; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
862; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
863; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
864; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]]
865; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]]
866; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
867; FORCED-NEXT:    [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
868; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
869; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
870; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
871; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
872; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]])
873; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]])
874; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
875; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
876; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
877; FORCED:       [[MIDDLE_BLOCK]]:
878; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
879; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
880; FORCED:       [[SCALAR_PH]]:
881; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
882; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
883; FORCED:       [[LOOP_HEADER]]:
884; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
885; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
886; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
887; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
888; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
889; FORCED-NEXT:      i64 0, label %[[DEFAULT]]
890; FORCED-NEXT:    ]
891; FORCED:       [[IF_THEN_1]]:
892; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
893; FORCED-NEXT:    br label %[[LOOP_LATCH]]
894; FORCED:       [[IF_THEN_2]]:
895; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
896; FORCED-NEXT:    br label %[[LOOP_LATCH]]
897; FORCED:       [[DEFAULT]]:
898; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
899; FORCED-NEXT:    br label %[[LOOP_LATCH]]
900; FORCED:       [[LOOP_LATCH]]:
901; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
902; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
903; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
904; FORCED:       [[EXIT]]:
905; FORCED-NEXT:    ret void
906;
907entry:
908  br label %loop.header
909
910loop.header:
911  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
912  %l = load i64, ptr %ptr.iv, align 1
913  switch i64 %l, label %default [
914  i64 -12, label %if.then.1
915  i64 13, label %if.then.2
916  i64 0, label %default
917  ]
918
919if.then.1:
920  store i64 42, ptr %ptr.iv, align 1
921  br label %loop.latch
922
923if.then.2:
924  store i64 0, ptr %ptr.iv, align 1
925  br label %loop.latch
926
927default:
928  store i64 2, ptr %ptr.iv, align 1
929  br label %loop.latch
930
931loop.latch:
932  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
933  %ec = icmp eq ptr %ptr.iv.next, %end
934  br i1 %ec, label %exit, label %loop.header
935
936exit:
937  ret void
938}
939
940define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
941; COST-LABEL: define void @switch_under_br_default_common_dest_with_case(
942; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
943; COST-NEXT:  [[ENTRY:.*]]:
944; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
945; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
946; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
947; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
948; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
949; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
950; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
951; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
952; COST:       [[VECTOR_PH]]:
953; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
954; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
955; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
956; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
957; COST-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
958; COST-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
959; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
960; COST:       [[VECTOR_BODY]]:
961; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
962; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
963; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
964; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
965; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
966; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
967; COST-NEXT:    [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
968; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
969; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
970; COST-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
971; COST-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
972; COST-NEXT:    [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
973; COST-NEXT:    [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true)
974; COST-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
975; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
976; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
977; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
978; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
979; COST-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
980; COST-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
981; COST:       [[MIDDLE_BLOCK]]:
982; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
983; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
984; COST:       [[SCALAR_PH]]:
985; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
986; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
987; COST:       [[LOOP_HEADER]]:
988; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
989; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
990; COST-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
991; COST-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
992; COST:       [[THEN]]:
993; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
994; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
995; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
996; COST-NEXT:      i64 0, label %[[DEFAULT]]
997; COST-NEXT:    ]
998; COST:       [[IF_THEN_1]]:
999; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
1000; COST-NEXT:    br label %[[LOOP_LATCH]]
1001; COST:       [[IF_THEN_2]]:
1002; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
1003; COST-NEXT:    br label %[[LOOP_LATCH]]
1004; COST:       [[DEFAULT]]:
1005; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
1006; COST-NEXT:    br label %[[LOOP_LATCH]]
1007; COST:       [[LOOP_LATCH]]:
1008; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
1009; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1010; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
1011; COST:       [[EXIT]]:
1012; COST-NEXT:    ret void
1013;
1014; FORCED-LABEL: define void @switch_under_br_default_common_dest_with_case(
1015; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
1016; FORCED-NEXT:  [[ENTRY:.*]]:
1017; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1018; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1019; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
1020; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1021; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
1022; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1023; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
1024; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1025; FORCED:       [[VECTOR_PH]]:
1026; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
1027; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1028; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
1029; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
1030; FORCED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
1031; FORCED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1032; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
1033; FORCED:       [[VECTOR_BODY]]:
1034; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1035; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1036; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
1037; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
1038; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
1039; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
1040; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
1041; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
1042; FORCED-NEXT:    [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1043; FORCED-NEXT:    [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
1044; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
1045; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
1046; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
1047; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
1048; FORCED-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
1049; FORCED-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
1050; FORCED-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
1051; FORCED-NEXT:    [[TMP26:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
1052; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP15]], [[TMP25]]
1053; FORCED-NEXT:    [[TMP20:%.*]] = or <4 x i1> [[TMP16]], [[TMP26]]
1054; FORCED-NEXT:    [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], splat (i1 true)
1055; FORCED-NEXT:    [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true)
1056; FORCED-NEXT:    [[TMP23:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer
1057; FORCED-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer
1058; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP25]])
1059; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]])
1060; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
1061; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
1062; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
1063; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
1064; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1065; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1066; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1067; FORCED:       [[MIDDLE_BLOCK]]:
1068; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1069; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1070; FORCED:       [[SCALAR_PH]]:
1071; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
1072; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
1073; FORCED:       [[LOOP_HEADER]]:
1074; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1075; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
1076; FORCED-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
1077; FORCED-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
1078; FORCED:       [[THEN]]:
1079; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
1080; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
1081; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
1082; FORCED-NEXT:      i64 0, label %[[DEFAULT]]
1083; FORCED-NEXT:    ]
1084; FORCED:       [[IF_THEN_1]]:
1085; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
1086; FORCED-NEXT:    br label %[[LOOP_LATCH]]
1087; FORCED:       [[IF_THEN_2]]:
1088; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
1089; FORCED-NEXT:    br label %[[LOOP_LATCH]]
1090; FORCED:       [[DEFAULT]]:
1091; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
1092; FORCED-NEXT:    br label %[[LOOP_LATCH]]
1093; FORCED:       [[LOOP_LATCH]]:
1094; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
1095; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1096; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
1097; FORCED:       [[EXIT]]:
1098; FORCED-NEXT:    ret void
1099;
1100entry:
1101  br label %loop.header
1102
1103loop.header:
1104  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1105  %l = load i64, ptr %ptr.iv, align 1
1106  %c = icmp ule i64 %l, %x
1107  br i1 %c, label %then, label %loop.latch
1108
1109then:
1110  switch i64 %l, label %default [
1111  i64 -12, label %if.then.1
1112  i64 13, label %if.then.2
1113  i64 0, label %default
1114  ]
1115
1116if.then.1:
1117  store i64 42, ptr %ptr.iv, align 1
1118  br label %loop.latch
1119
1120if.then.2:
1121  store i64 0, ptr %ptr.iv, align 1
1122  br label %loop.latch
1123
1124default:
1125  store i64 2, ptr %ptr.iv, align 1
1126  br label %loop.latch
1127
1128loop.latch:
1129  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
1130  %ec = icmp eq ptr %ptr.iv.next, %end
1131  br i1 %ec, label %exit, label %loop.header
1132
1133exit:
1134  ret void
1135}
1136
1137define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
1138; COST-LABEL: define void @br_under_switch_default_common_dest_with_case(
1139; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
1140; COST-NEXT:  [[ENTRY:.*]]:
1141; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
1142; COST:       [[LOOP_HEADER]]:
1143; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1144; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
1145; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
1146; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
1147; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
1148; COST-NEXT:      i64 0, label %[[DEFAULT]]
1149; COST-NEXT:    ]
1150; COST:       [[IF_THEN_1]]:
1151; COST-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
1152; COST-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
1153; COST:       [[THEN]]:
1154; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
1155; COST-NEXT:    br label %[[DEFAULT]]
1156; COST:       [[IF_THEN_2]]:
1157; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
1158; COST-NEXT:    br label %[[LOOP_LATCH]]
1159; COST:       [[DEFAULT]]:
1160; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
1161; COST-NEXT:    br label %[[LOOP_LATCH]]
1162; COST:       [[LOOP_LATCH]]:
1163; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
1164; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1165; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
1166; COST:       [[EXIT]]:
1167; COST-NEXT:    ret void
1168;
1169; FORCED-LABEL: define void @br_under_switch_default_common_dest_with_case(
1170; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
1171; FORCED-NEXT:  [[ENTRY:.*]]:
1172; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1173; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1174; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
1175; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1176; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
1177; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1178; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
1179; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1180; FORCED:       [[VECTOR_PH]]:
1181; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
1182; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1183; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
1184; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
1185; FORCED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
1186; FORCED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1187; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
1188; FORCED:       [[VECTOR_BODY]]:
1189; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1190; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1191; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
1192; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
1193; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
1194; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
1195; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
1196; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
1197; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
1198; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
1199; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
1200; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
1201; FORCED-NEXT:    [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]]
1202; FORCED-NEXT:    [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]]
1203; FORCED-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true)
1204; FORCED-NEXT:    [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
1205; FORCED-NEXT:    [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1206; FORCED-NEXT:    [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
1207; FORCED-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
1208; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
1209; FORCED-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer
1210; FORCED-NEXT:    [[TMP28:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer
1211; FORCED-NEXT:    [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP25]]
1212; FORCED-NEXT:    [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP26]]
1213; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP29]])
1214; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP30]])
1215; FORCED-NEXT:    [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
1216; FORCED-NEXT:    [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
1217; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]])
1218; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]])
1219; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
1220; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
1221; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]])
1222; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]])
1223; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1224; FORCED-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1225; FORCED-NEXT:    br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1226; FORCED:       [[MIDDLE_BLOCK]]:
1227; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1228; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1229; FORCED:       [[SCALAR_PH]]:
1230; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
1231; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
1232; FORCED:       [[LOOP_HEADER]]:
1233; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1234; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
1235; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
1236; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
1237; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
1238; FORCED-NEXT:      i64 0, label %[[DEFAULT]]
1239; FORCED-NEXT:    ]
1240; FORCED:       [[IF_THEN_1]]:
1241; FORCED-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
1242; FORCED-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
1243; FORCED:       [[THEN]]:
1244; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
1245; FORCED-NEXT:    br label %[[DEFAULT]]
1246; FORCED:       [[IF_THEN_2]]:
1247; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
1248; FORCED-NEXT:    br label %[[LOOP_LATCH]]
1249; FORCED:       [[DEFAULT]]:
1250; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
1251; FORCED-NEXT:    br label %[[LOOP_LATCH]]
1252; FORCED:       [[LOOP_LATCH]]:
1253; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
1254; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1255; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
1256; FORCED:       [[EXIT]]:
1257; FORCED-NEXT:    ret void
1258;
1259entry:
1260  br label %loop.header
1261
1262loop.header:
1263  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1264  %l = load i64, ptr %ptr.iv, align 1
1265  switch i64 %l, label %default [
1266  i64 -12, label %if.then.1
1267  i64 13, label %if.then.2
1268  i64 0, label %default
1269  ]
1270
1271if.then.1:
1272  %c = icmp ule i64 %l, %x
1273  br i1 %c, label %then, label %if.then.2
1274
1275then:
1276  store i64 42, ptr %ptr.iv, align 1
1277  br label %default
1278
1279if.then.2:
1280  store i64 0, ptr %ptr.iv, align 1
1281  br label %loop.latch
1282
1283default:
1284  store i64 2, ptr %ptr.iv, align 1
1285  br label %loop.latch
1286
1287loop.latch:
1288  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
1289  %ec = icmp eq ptr %ptr.iv.next, %end
1290  br i1 %ec, label %exit, label %loop.header
1291
1292exit:
1293  ret void
1294}
1295
1296define void @large_number_of_cases(ptr %start, ptr %end) {
1297; COST-LABEL: define void @large_number_of_cases(
1298; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
1299; COST-NEXT:  [[ENTRY:.*]]:
1300; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
1301; COST:       [[LOOP_HEADER]]:
1302; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1303; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
1304; COST-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
1305; COST-NEXT:      i64 1, label %[[IF_THEN:.*]]
1306; COST-NEXT:      i64 3, label %[[IF_THEN]]
1307; COST-NEXT:      i64 11, label %[[IF_THEN]]
1308; COST-NEXT:      i64 99, label %[[IF_THEN]]
1309; COST-NEXT:      i64 213, label %[[IF_THEN]]
1310; COST-NEXT:      i64 238, label %[[IF_THEN]]
1311; COST-NEXT:      i64 513, label %[[IF_THEN]]
1312; COST-NEXT:      i64 791, label %[[IF_THEN]]
1313; COST-NEXT:      i64 899, label %[[IF_THEN]]
1314; COST-NEXT:    ]
1315; COST:       [[IF_THEN]]:
1316; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
1317; COST-NEXT:    br label %[[LOOP_LATCH]]
1318; COST:       [[LOOP_LATCH]]:
1319; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
1320; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1321; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
1322; COST:       [[EXIT]]:
1323; COST-NEXT:    ret void
1324;
1325; FORCED-LABEL: define void @large_number_of_cases(
1326; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
1327; FORCED-NEXT:  [[ENTRY:.*]]:
1328; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1329; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1330; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
1331; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1332; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
1333; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1334; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
1335; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1336; FORCED:       [[VECTOR_PH]]:
1337; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
1338; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1339; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
1340; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
1341; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
1342; FORCED:       [[VECTOR_BODY]]:
1343; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1344; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1345; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
1346; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
1347; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
1348; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
1349; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
1350; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
1351; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1)
1352; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 1)
1353; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
1354; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
1355; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 11)
1356; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 11)
1357; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 99)
1358; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 99)
1359; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 213)
1360; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 213)
1361; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 238)
1362; FORCED-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 238)
1363; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 513)
1364; FORCED-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 513)
1365; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 791)
1366; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 791)
1367; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 899)
1368; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 899)
1369; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
1370; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
1371; FORCED-NEXT:    [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]]
1372; FORCED-NEXT:    [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP14]]
1373; FORCED-NEXT:    [[TMP31:%.*]] = or <4 x i1> [[TMP29]], [[TMP15]]
1374; FORCED-NEXT:    [[TMP32:%.*]] = or <4 x i1> [[TMP30]], [[TMP16]]
1375; FORCED-NEXT:    [[TMP33:%.*]] = or <4 x i1> [[TMP31]], [[TMP17]]
1376; FORCED-NEXT:    [[TMP34:%.*]] = or <4 x i1> [[TMP32]], [[TMP18]]
1377; FORCED-NEXT:    [[TMP35:%.*]] = or <4 x i1> [[TMP33]], [[TMP19]]
1378; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP34]], [[TMP20]]
1379; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP35]], [[TMP21]]
1380; FORCED-NEXT:    [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP22]]
1381; FORCED-NEXT:    [[TMP39:%.*]] = or <4 x i1> [[TMP37]], [[TMP23]]
1382; FORCED-NEXT:    [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
1383; FORCED-NEXT:    [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
1384; FORCED-NEXT:    [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
1385; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]])
1386; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]])
1387; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1388; FORCED-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1389; FORCED-NEXT:    br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1390; FORCED:       [[MIDDLE_BLOCK]]:
1391; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1392; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1393; FORCED:       [[SCALAR_PH]]:
1394; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
1395; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
1396; FORCED:       [[LOOP_HEADER]]:
1397; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1398; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
1399; FORCED-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
1400; FORCED-NEXT:      i64 1, label %[[IF_THEN:.*]]
1401; FORCED-NEXT:      i64 3, label %[[IF_THEN]]
1402; FORCED-NEXT:      i64 11, label %[[IF_THEN]]
1403; FORCED-NEXT:      i64 99, label %[[IF_THEN]]
1404; FORCED-NEXT:      i64 213, label %[[IF_THEN]]
1405; FORCED-NEXT:      i64 238, label %[[IF_THEN]]
1406; FORCED-NEXT:      i64 513, label %[[IF_THEN]]
1407; FORCED-NEXT:      i64 791, label %[[IF_THEN]]
1408; FORCED-NEXT:      i64 899, label %[[IF_THEN]]
1409; FORCED-NEXT:    ]
1410; FORCED:       [[IF_THEN]]:
1411; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
1412; FORCED-NEXT:    br label %[[LOOP_LATCH]]
1413; FORCED:       [[LOOP_LATCH]]:
1414; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
1415; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1416; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
1417; FORCED:       [[EXIT]]:
1418; FORCED-NEXT:    ret void
1419;
1420entry:
1421  br label %loop.header
1422
1423loop.header:
1424  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1425  %l = load i64, ptr %ptr.iv, align 1
1426  switch i64 %l, label %loop.latch [
1427  i64 1, label %if.then
1428  i64 3, label %if.then
1429  i64 11, label %if.then
1430  i64 99, label %if.then
1431  i64 213, label %if.then
1432  i64 238, label %if.then
1433  i64 513, label %if.then
1434  i64 791, label %if.then
1435  i64 899, label %if.then
1436  ]
1437
1438if.then:
1439  store i64 42, ptr %ptr.iv, align 1
1440  br label %loop.latch
1441
1442loop.latch:
1443  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
1444  %ec = icmp eq ptr %ptr.iv.next, %end
1445  br i1 %ec, label %exit, label %loop.header
1446
1447exit:
1448  ret void
1449}
1450
1451;.
1452; COST: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1453; COST: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1454; COST: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1455; COST: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1456; COST: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1457; COST: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1458; COST: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1459; COST: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1460; COST: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1461; COST: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1462;.
1463; FORCED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1464; FORCED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1465; FORCED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1466; FORCED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1467; FORCED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1468; FORCED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1469; FORCED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1470; FORCED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1471; FORCED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1472; FORCED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1473; FORCED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1474; FORCED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
1475; FORCED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1476; FORCED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
1477; FORCED: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1478; FORCED: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
1479; FORCED: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1480; FORCED: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
1481; FORCED: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
1482; FORCED: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
1483;.
1484