xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll (revision 1de3dc7d23dd6b856efad3a3a04f2396328726d7)
1; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
2
3; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds)
4; are dropped from instructions in blocks that need predication and are linearized
5; and masked after vectorization. We only drop flags from scalar instructions that
6; contribute to the address computation of a masked vector load/store. After
7; linearizing the control flow and removing their guarding condition, these
8; instructions could generate a poison value which would be used as base address of
9; the masked vector load/store (see PR52111). For gather/scatter cases,
10; posiong-generating flags can be preserved since poison addresses in the vector GEP
11; reaching the gather/scatter instruction will be masked-out by the gather/scatter
12; instruction itself and won't be used.
13; We need AVX512 target features for the loop to be vectorized with masks instead of
14; predicates.
15
16target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
17target triple = "x86_64-pc-linux-gnu"
18
19; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
20; Test for PR52111.
21define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
22                                 ptr %output) local_unnamed_addr #0 {
23; CHECK-LABEL: @drop_scalar_nuw_nsw(
24; CHECK:       vector.body:
25; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
26; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
27; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
28; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
29; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
30; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP0]], 1
31; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
32; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
33; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
34entry:
35  br label %loop.header
36
37loop.header:
38  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
39  %i23 = icmp eq i64 %iv, 0
40  br i1 %i23, label %if.end, label %if.then
41
42if.then:
43  %i27 = sub nuw nsw i64 %iv, 1
44  %i29 = getelementptr inbounds float, ptr %input, i64 %i27
45  %i30 = load float, ptr %i29, align 4, !invariant.load !0
46  br label %if.end
47
48if.end:
49  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
50  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
51  store float %i34, ptr %i35, align 4
52  %iv.inc = add nuw nsw i64 %iv, 1
53  %exitcond = icmp eq i64 %iv.inc, 4
54  br i1 %exitcond, label %loop.exit, label %loop.header
55
56loop.exit:
57  ret void
58}
59
60; Variant with getelementptr nusw.
61define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input,
62                                  ptr %output) local_unnamed_addr #0 {
63; CHECK-LABEL: @drop_scalar_gep_nusw(
64; CHECK:       vector.body:
65; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
66; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
67; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
68; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
69; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
70; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP0]], 1
71; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
72; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
73; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
74entry:
75  br label %loop.header
76
77loop.header:
78  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
79  %i23 = icmp eq i64 %iv, 0
80  br i1 %i23, label %if.end, label %if.then
81
82if.then:
83  %i27 = sub nuw nsw i64 %iv, 1
84  %i29 = getelementptr nusw float, ptr %input, i64 %i27
85  %i30 = load float, ptr %i29, align 4, !invariant.load !0
86  br label %if.end
87
88if.end:
89  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
90  %i35 = getelementptr nusw float, ptr %output, i64 %iv
91  store float %i34, ptr %i35, align 4
92  %iv.inc = add nuw nsw i64 %iv, 1
93  %exitcond = icmp eq i64 %iv.inc, 4
94  br i1 %exitcond, label %loop.exit, label %loop.header
95
96loop.exit:
97  ret void
98}
99
100; Variant with getelementptr nuw.
101define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input,
102                                 ptr %output) local_unnamed_addr #0 {
103; CHECK-LABEL: @drop_scalar_gep_nuw(
104; CHECK:       vector.body:
105; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
106; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
107; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
108; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
109; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
110; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP0]], 1
111; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
112; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
113; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
114entry:
115  br label %loop.header
116
117loop.header:
118  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
119  %i23 = icmp eq i64 %iv, 0
120  br i1 %i23, label %if.end, label %if.then
121
122if.then:
123  %i27 = sub nuw nsw i64 %iv, 1
124  %i29 = getelementptr nuw float, ptr %input, i64 %i27
125  %i30 = load float, ptr %i29, align 4, !invariant.load !0
126  br label %if.end
127
128if.end:
129  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
130  %i35 = getelementptr nuw float, ptr %output, i64 %iv
131  store float %i34, ptr %i35, align 4
132  %iv.inc = add nuw nsw i64 %iv, 1
133  %exitcond = icmp eq i64 %iv.inc, 4
134  br i1 %exitcond, label %loop.exit, label %loop.header
135
136loop.exit:
137  ret void
138}
139
140; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
141; In this case, 'sub' and 'getelementptr' are not guarded by the predicate.
142define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
143                                         ptr %output) local_unnamed_addr #0 {
144; CHECK-LABEL: @drop_nonpred_scalar_nuw_nsw(
145; CHECK:       vector.body:
146; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
147; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
148; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
149; CHECK:         [[TMP5:%.*]] = sub i64 [[TMP0]], 1
150; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
151; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
152; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
153; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
154; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
155entry:
156  br label %loop.header
157
158loop.header:
159  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
160  %i27 = sub i64 %iv, 1
161  %i29 = getelementptr float, ptr %input, i64 %i27
162  %i23 = icmp eq i64 %iv, 0
163  br i1 %i23, label %if.end, label %if.then
164
165if.then:
166  %i30 = load float, ptr %i29, align 4, !invariant.load !0
167  br label %if.end
168
169if.end:
170  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
171  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
172  store float %i34, ptr %i35, align 4
173  %iv.inc = add nuw nsw i64 %iv, 1
174  %exitcond = icmp eq i64 %iv.inc, 4
175  br i1 %exitcond, label %loop.exit, label %loop.header
176
177loop.exit:
178  ret void
179}
180
181; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather.
182define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input,
183                                     ptr %output) local_unnamed_addr #0 {
184; CHECK-LABEL: @preserve_vector_nuw_nsw(
185; CHECK:       vector.body:
186; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
187; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
188; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
189; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
190; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
191; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
192; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], splat (i64 2)
193; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]]
194; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> poison), !invariant.load !0
195entry:
196  br label %loop.header
197
198loop.header:
199  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
200  %i23 = icmp eq i64 %iv, 0
201  br i1 %i23, label %if.end, label %if.then
202
203if.then:
204  %i27 = sub nuw nsw i64 %iv, 1
205  %i28 = mul nuw nsw i64 %i27, 2
206  %i29 = getelementptr inbounds float, ptr %input, i64 %i28
207  %i30 = load float, ptr %i29, align 4, !invariant.load !0
208  br label %if.end
209
210if.end:
211  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
212  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
213  store float %i34, ptr %i35, align 4
214  %iv.inc = add nuw nsw i64 %iv, 1
215  %exitcond = icmp eq i64 %iv.inc, 4
216  br i1 %exitcond, label %loop.exit, label %loop.header
217
218loop.exit:
219  ret void
220}
221
222; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load.
223define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input,
224                                 ptr %output, ptr noalias %ptrs) local_unnamed_addr #0 {
225; CHECK-LABEL: @drop_vector_nuw_nsw(
226; CHECK:       vector.body:
227; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
228; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
229; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
230; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
231; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS:%.*]], i64 [[TMP0]]
232; CHECK-NEXT:    [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1)
233; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]]
234; CHECK:         [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
235; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0
236; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0
237; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
238entry:
239  br label %loop.header
240
241loop.header:
242  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
243  %i23 = icmp eq i64 %iv, 0
244  %gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv
245  %i27 = sub nuw nsw i64 %iv, 1
246  %i29 = getelementptr inbounds float, ptr %input, i64 %i27
247  store ptr %i29, ptr %gep
248  br i1 %i23, label %if.end, label %if.then
249
250if.then:
251  %i30 = load float, ptr %i29, align 4, !invariant.load !0
252  br label %if.end
253
254if.end:
255  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
256  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
257  store float %i34, ptr %i35, align 4
258  %iv.inc = add nuw nsw i64 %iv, 1
259  %exitcond = icmp eq i64 %iv.inc, 4
260  br i1 %exitcond, label %loop.exit, label %loop.header
261
262loop.exit:
263  ret void
264}
265
266; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
267; of any masked load/store/gather/scatter.
268define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
269; CHECK-LABEL: @preserve_nuw_nsw_no_addr(
270; CHECK:       vector.body:
271; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
272; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
273; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
274; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
275; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
276; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
277; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer
278; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]]
279; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
280; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4
281entry:
282  br label %loop.header
283
284loop.header:
285  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
286  %i23 = icmp eq i64 %iv, 0
287  br i1 %i23, label %if.end, label %if.then
288
289if.then:
290  %i27 = sub nuw nsw i64 %iv, 1
291  br label %if.end
292
293if.end:
294  %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ]
295  %i35 = getelementptr inbounds i64, ptr %output, i64 %iv
296  store i64 %i34, ptr %i35, align 4
297  %iv.inc = add nuw nsw i64 %iv, 1
298  %exitcond = icmp eq i64 %iv.inc, 4
299  br i1 %exitcond, label %loop.exit, label %loop.header
300
301loop.exit:
302  ret void
303}
304
305; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load.
306define void @drop_scalar_exact(ptr noalias nocapture readonly %input,
307                               ptr %output) local_unnamed_addr #0 {
308; CHECK-LABEL: @drop_scalar_exact(
309; CHECK:       vector.body:
310; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
311; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
312; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
313; CHECK:         [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
314; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
315; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
316; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
317; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
318; CHECK-NEXT:    [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1
319; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP8]]
320; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
321; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP11]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
322entry:
323  br label %loop.header
324
325loop.header:
326  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
327  %i7 = icmp ne i64 %iv, 0
328  %i8 = and i64 %iv, 1
329  %i9 = icmp eq i64 %i8, 0
330  %i10 = and i1 %i7, %i9
331  br i1 %i10, label %if.end, label %if.then
332
333if.then:
334  %i26 = sdiv exact i64 %iv, 1
335  %i29 = getelementptr inbounds float, ptr %input, i64 %i26
336  %i30 = load float, ptr %i29, align 4, !invariant.load !0
337  br label %if.end
338
339if.end:
340  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
341  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
342  store float %i34, ptr %i35, align 4
343  %iv.inc = add nuw nsw i64 %iv, 1
344  %exitcond = icmp eq i64 %iv.inc, 4
345  br i1 %exitcond, label %loop.exit, label %loop.header
346
347loop.exit:
348  ret void
349}
350
351define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
352; CHECK-LABEL: define void @drop_zext_nneg(
353; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
354; CHECK-NEXT:  entry:
355; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
356; CHECK:       vector.ph:
357; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
358; CHECK:       vector.body:
359; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
360; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
361; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer
362; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
363; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
364; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]]
365; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0
366; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison)
367; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
368; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]]
369; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
370; CHECK-NEXT:    store double [[TMP6]], ptr [[P1]], align 8
371; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
372; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
373; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
374; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
375; CHECK:       middle.block:
376; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
377; CHECK:       scalar.ph:
378; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
379; CHECK-NEXT:    br label [[BODY:%.*]]
380; CHECK:       body:
381; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
382; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[IV]] to i32
383; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[TMP8]], 0
384; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[ELSE]]
385; CHECK:       then:
386; CHECK-NEXT:    [[ZEXT:%.*]] = zext nneg i32 [[TMP8]] to i64
387; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
388; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
389; CHECK-NEXT:    [[TMP9:%.*]] = load double, ptr [[IDX2]], align 8
390; CHECK-NEXT:    br label [[ELSE]]
391; CHECK:       else:
392; CHECK-NEXT:    [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ]
393; CHECK-NEXT:    store double [[PHI]], ptr [[P1]], align 8
394; CHECK-NEXT:    [[NEXT]] = add i64 [[IV]], 1
395; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[NEXT]], 1024
396; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]]
397; CHECK:       exit:
398; CHECK-NEXT:    ret void
399;
400entry:
401  br label %body
402
403body:
404  %iv = phi i64 [ %next, %else ], [ 0, %entry ]
405  %0 = trunc i64 %iv to i32
406  %c = icmp eq i32 %0, 0
407  br i1 %c, label %then, label %else
408
409then:
410  %zext = zext nneg i32 %0 to i64
411  %idx1 = getelementptr double, ptr %p, i64 %zext
412  %idx2 = getelementptr double, ptr %p, i64 %zext
413  %1 = load double, ptr %idx2, align 8
414  br label %else
415
416else:
417  %phi = phi double [ %1, %then ], [ 0.000000e+00, %body ]
418  store double %phi, ptr %p1, align 8
419  %next = add i64 %iv, 1
420  %cmp = icmp eq i64 %next, 1024
421  br i1 %cmp, label %exit, label %body
422
423exit:
424  ret void
425}
426
427; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather.
428define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input,
429                                           ptr %output) local_unnamed_addr #0 {
430; CHECK-LABEL: @preserve_vector_exact_no_addr(
431; CHECK:       vector.body:
432; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
433; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
434; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
435; CHECK:         [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
436; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
437; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
438; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
439; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
440; CHECK-NEXT:    [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
441; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP8]]
442; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
443;
444entry:
445  br label %loop.header
446
447loop.header:
448  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
449  %i7 = icmp ne i64 %iv, 0
450  %i8 = and i64 %iv, 1
451  %i9 = icmp eq i64 %i8, 0
452  %i10 = and i1 %i7, %i9
453  br i1 %i10, label %if.end, label %if.then
454
455if.then:
456  %i26 = sdiv exact i64 %iv, 2
457  %i29 = getelementptr inbounds float, ptr %input, i64 %i26
458  %i30 = load float, ptr %i29, align 4, !invariant.load !0
459  br label %if.end
460
461if.end:
462  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
463  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
464  store float %i34, ptr %i35, align 4
465  %iv.inc = add nuw nsw i64 %iv, 1
466  %exitcond = icmp eq i64 %iv.inc, 4
467  br i1 %exitcond, label %loop.exit, label %loop.header
468
469loop.exit:
470  ret void
471}
472
473; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation
474; of any masked load/store/gather/scatter.
475define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
476; CHECK-LABEL: @preserve_exact_no_addr(
477; CHECK:       vector.body:
478; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
479; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
480; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
481; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
482; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
483; CHECK-NEXT:    [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
484; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer
485; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]]
486; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
487; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4
488entry:
489  br label %loop.header
490
491loop.header:
492  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
493  %i23 = icmp eq i64 %iv, 0
494  br i1 %i23, label %if.end, label %if.then
495
496if.then:
497  %i27 = sdiv exact i64 %iv, 2
498  br label %if.end
499
500if.end:
501  %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ]
502  %i35 = getelementptr inbounds i64, ptr %output, i64 %iv
503  store i64 %i34, ptr %i35, align 4
504  %iv.inc = add nuw nsw i64 %iv, 1
505  %exitcond = icmp eq i64 %iv.inc, 4
506  br i1 %exitcond, label %loop.exit, label %loop.header
507
508loop.exit:
509  ret void
510}
511
512; Make sure we don't vectorize a loop with a phi feeding a poison value to
513; a masked load/gather.
514define void @dont_vectorize_poison_phi(ptr noalias nocapture readonly %input,
515; CHECK-LABEL: @dont_vectorize_poison_phi(
516; CHECK-NEXT:  entry:
517; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
518; CHECK:       loop.header:
519; CHECK-NEXT:    [[POISON:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[IF_END:%.*]] ]
520; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_INC]], [[IF_END]] ]
521; CHECK-NEXT:    [[I23:%.*]] = icmp eq i64 [[IV]], 0
522; CHECK-NEXT:    br i1 [[I23]], label [[IF_END]], label [[IF_THEN:%.*]]
523; CHECK:       if.then:
524; CHECK-NEXT:    [[I29:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], i64 [[POISON]]
525; CHECK-NEXT:    [[I30:%.*]] = load float, ptr [[I29]], align 4, !invariant.load !0
526; CHECK-NEXT:    br label [[IF_END]]
527; CHECK:       if.end:
528; CHECK-NEXT:    [[I34:%.*]] = phi float [ 0.000000e+00, [[LOOP_HEADER]] ], [ [[I30]], [[IF_THEN]] ]
529; CHECK-NEXT:    [[I35:%.*]] = getelementptr inbounds float, ptr [[OUTPUT:%.*]], i64 [[IV]]
530; CHECK-NEXT:    store float [[I34]], ptr [[I35]], align 4
531; CHECK-NEXT:    [[IV_INC]] = add nuw nsw i64 [[IV]], 1
532; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4
533; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]]
534; CHECK:       loop.exit:
535; CHECK-NEXT:    ret void
536;
537  ptr %output) local_unnamed_addr #0 {
538entry:
539  br label %loop.header
540
541loop.header:
542  %poison = phi i64 [ poison, %entry ], [ %iv.inc, %if.end ]
543  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
544  %i23 = icmp eq i64 %iv, 0
545  br i1 %i23, label %if.end, label %if.then
546
547if.then:
548  %i29 = getelementptr inbounds float, ptr %input, i64 %poison
549  %i30 = load float, ptr %i29, align 4, !invariant.load !0
550  br label %if.end
551
552if.end:
553  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
554  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
555  store float %i34, ptr %i35, align 4
556  %iv.inc = add nuw nsw i64 %iv, 1
557  %exitcond = icmp eq i64 %iv.inc, 4
558  br i1 %exitcond, label %loop.exit, label %loop.header
559
560loop.exit:
561  ret void
562}
563
564@c = external global [5 x i8]
565
566; Test case for https://github.com/llvm/llvm-project/issues/70590.
567; Note that the then block has UB, but I could not find any other way to
568; construct a suitable test case.
569define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
570; CHECK-LABEL: @pr70590_recipe_without_underlying_instr(
571; CHECK:       vector.body:
572; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE6:%.*]] ]
573; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ]
574; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
575; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]],
576; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
577; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
578; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
579; CHECK:       pred.srem.if:
580; CHECK-NEXT:    [[TMP4:%.*]] = srem i64 3, 0
581; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE]]
582; CHECK:       pred.srem.continue:
583; CHECK-NEXT:    [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ]
584; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
585; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]]
586; CHECK:       pred.srem.if1:
587; CHECK-NEXT:    [[TMP7:%.*]] = srem i64 3, 0
588; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE2]]
589; CHECK:       pred.srem.continue2:
590; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
591; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]]
592; CHECK:       pred.srem.if3:
593; CHECK-NEXT:    [[TMP10:%.*]] = srem i64 3, 0
594; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE4]]
595; CHECK:       pred.srem.continue4:
596; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
597; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]]
598; CHECK:       pred.srem.if5:
599; CHECK-NEXT:    [[TMP13:%.*]] = srem i64 3, 0
600; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE6]]
601; CHECK:       pred.srem.continue6:
602; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP5]], -3
603; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[TMP0]], [[TMP15]]
604; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
605; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0
606; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
607; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> [[WIDE_LOAD]], <4 x i8> zeroinitializer
608; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]]
609; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0
610; CHECK-NEXT:    store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4
611; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
612; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
613; CHECK-NEXT:    br i1 true, label %middle.block, label %vector.body
614; CHECK:       middle.block:
615
616entry:
617  br label %loop.header
618
619loop.header:
620  %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
621  %cmp = icmp eq i64 %iv, %n
622  br i1 %cmp, label %loop.latch, label %then
623
624then:
625  %rem = srem i64 3, 0
626  %add3 = add i64 %rem, -3
627  %add5 = add i64 %iv, %add3
628  %gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5
629  %l = load i8, ptr %gep, align 1
630  br label %loop.latch
631
632loop.latch:
633  %sr = phi i8 [ 0, %loop.header ], [ %l , %then ]
634  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
635  store i8 %sr, ptr %gep.dst, align 4
636  %inc = add i64 %iv, 1
637  %exitcond.not = icmp eq i64 %inc, 4
638  br i1 %exitcond.not, label %exit, label %loop.header
639
640exit:
641  ret void
642}
643
644; %B.gep.0 and pointers based on it can preserve inbounds, as the inbounds
645; versionused unconditionally in the store in the latch.
646; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore
647define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr readonly %C) #0 {
648; CHECK-LABEL: define void @Bgep_inbounds_unconditionally_due_to_store(
649; CHECK:       vector.body:
650; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
651; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
652; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr %C, i64 [[TMP0]]
653; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
654; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
655; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
656; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[TMP0]]
657; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0
658; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
659; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00)
660; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP6]]
661; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0
662; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
663; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
664; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
665; CHECK-NEXT:    br i1 [[TMP9]], label %middle.block, label %vector.body
666
667entry:
668  br label %loop.body
669
670loop.body:
671  %iv1 = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
672  %C.gep = getelementptr inbounds i32, ptr %C, i64 %iv1
673  %C.lv = load i32, ptr %C.gep, align 4
674  %cmp = icmp eq i32 %C.lv, 20
675  %B.gep.0 = getelementptr inbounds float, ptr %B, i64 %iv1
676  br i1 %cmp, label %loop.latch, label %else
677
678else:
679  %B.lv = load float, ptr %B.gep.0, align 4
680  %add = fadd float %B.lv, 2.0
681  br label %loop.latch
682
683loop.latch:
684  %add.sink = phi float [ %add, %else ], [ 33.0, %loop.body ]
685  store float %add.sink, ptr %B.gep.0, align 4
686  %iv.next = add nuw nsw i64 %iv1, 1
687  %exitcond.not = icmp eq i64 %iv.next, 10000
688  br i1 %exitcond.not, label %exit, label %loop.body
689
690exit:
691  ret void
692}
693
694attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
695
696!0 = !{}
697