1; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s 2 3; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds) 4; are dropped from instructions in blocks that need predication and are linearized 5; and masked after vectorization. We only drop flags from scalar instructions that 6; contribute to the address computation of a masked vector load/store. After 7; linearizing the control flow and removing their guarding condition, these 8; instructions could generate a poison value which would be used as base address of 9; the masked vector load/store (see PR52111). For gather/scatter cases, 10; posiong-generating flags can be preserved since poison addresses in the vector GEP 11; reaching the gather/scatter instruction will be masked-out by the gather/scatter 12; instruction itself and won't be used. 13; We need AVX512 target features for the loop to be vectorized with masks instead of 14; predicates. 15 16target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 17target triple = "x86_64-pc-linux-gnu" 18 19; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load. 20; Test for PR52111. 21define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, 22 ptr %output) local_unnamed_addr #0 { 23; CHECK-LABEL: @drop_scalar_nuw_nsw( 24; CHECK: vector.body: 25; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 26; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 27; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 28; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 29; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 30; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 31; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] 32; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 33; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 34entry: 35 br label %loop.header 36 37loop.header: 38 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 39 %i23 = icmp eq i64 %iv, 0 40 br i1 %i23, label %if.end, label %if.then 41 42if.then: 43 %i27 = sub nuw nsw i64 %iv, 1 44 %i29 = getelementptr inbounds float, ptr %input, i64 %i27 45 %i30 = load float, ptr %i29, align 4, !invariant.load !0 46 br label %if.end 47 48if.end: 49 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 50 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 51 store float %i34, ptr %i35, align 4 52 %iv.inc = add nuw nsw i64 %iv, 1 53 %exitcond = icmp eq i64 %iv.inc, 4 54 br i1 %exitcond, label %loop.exit, label %loop.header 55 56loop.exit: 57 ret void 58} 59 60; Variant with getelementptr nusw. 61define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, 62 ptr %output) local_unnamed_addr #0 { 63; CHECK-LABEL: @drop_scalar_gep_nusw( 64; CHECK: vector.body: 65; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 66; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 67; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 68; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 69; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 70; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 71; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] 72; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 73; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 74entry: 75 br label %loop.header 76 77loop.header: 78 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 79 %i23 = icmp eq i64 %iv, 0 80 br i1 %i23, label %if.end, label %if.then 81 82if.then: 83 %i27 = sub nuw nsw i64 %iv, 1 84 %i29 = getelementptr nusw float, ptr %input, i64 %i27 85 %i30 = load float, ptr %i29, align 4, !invariant.load !0 86 br label %if.end 87 88if.end: 89 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 90 %i35 = getelementptr nusw float, ptr %output, i64 %iv 91 store float %i34, ptr %i35, align 4 92 %iv.inc = add nuw nsw i64 %iv, 1 93 %exitcond = icmp eq i64 %iv.inc, 4 94 br i1 %exitcond, label %loop.exit, label %loop.header 95 96loop.exit: 97 ret void 98} 99 100; Variant with getelementptr nuw. 101define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, 102 ptr %output) local_unnamed_addr #0 { 103; CHECK-LABEL: @drop_scalar_gep_nuw( 104; CHECK: vector.body: 105; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 106; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 107; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 108; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 109; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 110; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 111; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] 112; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 113; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 114entry: 115 br label %loop.header 116 117loop.header: 118 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 119 %i23 = icmp eq i64 %iv, 0 120 br i1 %i23, label %if.end, label %if.then 121 122if.then: 123 %i27 = sub nuw nsw i64 %iv, 1 124 %i29 = getelementptr nuw float, ptr %input, i64 %i27 125 %i30 = load float, ptr %i29, align 4, !invariant.load !0 126 br label %if.end 127 128if.end: 129 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 130 %i35 = getelementptr nuw float, ptr %output, i64 %iv 131 store float %i34, ptr %i35, align 4 132 %iv.inc = add nuw nsw i64 %iv, 1 133 %exitcond = icmp eq i64 %iv.inc, 4 134 br i1 %exitcond, label %loop.exit, label %loop.header 135 136loop.exit: 137 ret void 138} 139 140; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load. 141; In this case, 'sub' and 'getelementptr' are not guarded by the predicate. 142define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, 143 ptr %output) local_unnamed_addr #0 { 144; CHECK-LABEL: @drop_nonpred_scalar_nuw_nsw( 145; CHECK: vector.body: 146; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 147; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 148; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 149; CHECK: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 150; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] 151; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 152; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 153; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 154; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 155entry: 156 br label %loop.header 157 158loop.header: 159 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 160 %i27 = sub i64 %iv, 1 161 %i29 = getelementptr float, ptr %input, i64 %i27 162 %i23 = icmp eq i64 %iv, 0 163 br i1 %i23, label %if.end, label %if.then 164 165if.then: 166 %i30 = load float, ptr %i29, align 4, !invariant.load !0 167 br label %if.end 168 169if.end: 170 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 171 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 172 store float %i34, ptr %i35, align 4 173 %iv.inc = add nuw nsw i64 %iv, 1 174 %exitcond = icmp eq i64 %iv.inc, 4 175 br i1 %exitcond, label %loop.exit, label %loop.header 176 177loop.exit: 178 ret void 179} 180 181; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather. 182define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, 183 ptr %output) local_unnamed_addr #0 { 184; CHECK-LABEL: @preserve_vector_nuw_nsw( 185; CHECK: vector.body: 186; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 187; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 188; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 189; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 190; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 191; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) 192; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], splat (i64 2) 193; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]] 194; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> poison), !invariant.load !0 195entry: 196 br label %loop.header 197 198loop.header: 199 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 200 %i23 = icmp eq i64 %iv, 0 201 br i1 %i23, label %if.end, label %if.then 202 203if.then: 204 %i27 = sub nuw nsw i64 %iv, 1 205 %i28 = mul nuw nsw i64 %i27, 2 206 %i29 = getelementptr inbounds float, ptr %input, i64 %i28 207 %i30 = load float, ptr %i29, align 4, !invariant.load !0 208 br label %if.end 209 210if.end: 211 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 212 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 213 store float %i34, ptr %i35, align 4 214 %iv.inc = add nuw nsw i64 %iv, 1 215 %exitcond = icmp eq i64 %iv.inc, 4 216 br i1 %exitcond, label %loop.exit, label %loop.header 217 218loop.exit: 219 ret void 220} 221 222; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load. 223define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, 224 ptr %output, ptr noalias %ptrs) local_unnamed_addr #0 { 225; CHECK-LABEL: @drop_vector_nuw_nsw( 226; CHECK: vector.body: 227; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 228; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 229; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 230; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 231; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS:%.*]], i64 [[TMP0]] 232; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1) 233; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]] 234; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 235; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 236; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 237; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 238entry: 239 br label %loop.header 240 241loop.header: 242 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 243 %i23 = icmp eq i64 %iv, 0 244 %gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv 245 %i27 = sub nuw nsw i64 %iv, 1 246 %i29 = getelementptr inbounds float, ptr %input, i64 %i27 247 store ptr %i29, ptr %gep 248 br i1 %i23, label %if.end, label %if.then 249 250if.then: 251 %i30 = load float, ptr %i29, align 4, !invariant.load !0 252 br label %if.end 253 254if.end: 255 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 256 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 257 store float %i34, ptr %i35, align 4 258 %iv.inc = add nuw nsw i64 %iv, 1 259 %exitcond = icmp eq i64 %iv.inc, 4 260 br i1 %exitcond, label %loop.exit, label %loop.header 261 262loop.exit: 263 ret void 264} 265 266; Preserve poison-generating flags from 'sub', which is not contributing to any address computation 267; of any masked load/store/gather/scatter. 268define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { 269; CHECK-LABEL: @preserve_nuw_nsw_no_addr( 270; CHECK: vector.body: 271; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 272; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 273; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 274; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 275; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 276; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) 277; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer 278; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] 279; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 280; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4 281entry: 282 br label %loop.header 283 284loop.header: 285 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 286 %i23 = icmp eq i64 %iv, 0 287 br i1 %i23, label %if.end, label %if.then 288 289if.then: 290 %i27 = sub nuw nsw i64 %iv, 1 291 br label %if.end 292 293if.end: 294 %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ] 295 %i35 = getelementptr inbounds i64, ptr %output, i64 %iv 296 store i64 %i34, ptr %i35, align 4 297 %iv.inc = add nuw nsw i64 %iv, 1 298 %exitcond = icmp eq i64 %iv.inc, 4 299 br i1 %exitcond, label %loop.exit, label %loop.header 300 301loop.exit: 302 ret void 303} 304 305; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load. 306define void @drop_scalar_exact(ptr noalias nocapture readonly %input, 307 ptr %output) local_unnamed_addr #0 { 308; CHECK-LABEL: @drop_scalar_exact( 309; CHECK: vector.body: 310; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 311; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 312; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 313; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer 314; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) 315; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer 316; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] 317; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) 318; CHECK-NEXT: [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1 319; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP8]] 320; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 321; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP11]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 322entry: 323 br label %loop.header 324 325loop.header: 326 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 327 %i7 = icmp ne i64 %iv, 0 328 %i8 = and i64 %iv, 1 329 %i9 = icmp eq i64 %i8, 0 330 %i10 = and i1 %i7, %i9 331 br i1 %i10, label %if.end, label %if.then 332 333if.then: 334 %i26 = sdiv exact i64 %iv, 1 335 %i29 = getelementptr inbounds float, ptr %input, i64 %i26 336 %i30 = load float, ptr %i29, align 4, !invariant.load !0 337 br label %if.end 338 339if.end: 340 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 341 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 342 store float %i34, ptr %i35, align 4 343 %iv.inc = add nuw nsw i64 %iv, 1 344 %exitcond = icmp eq i64 %iv.inc, 4 345 br i1 %exitcond, label %loop.exit, label %loop.header 346 347loop.exit: 348 ret void 349} 350 351define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { 352; CHECK-LABEL: define void @drop_zext_nneg( 353; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] { 354; CHECK-NEXT: entry: 355; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 356; CHECK: vector.ph: 357; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 358; CHECK: vector.body: 359; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 360; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 361; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer 362; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> 363; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 364; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] 365; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 366; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) 367; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) 368; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]] 369; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 370; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8 371; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 372; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 373; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 374; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 375; CHECK: middle.block: 376; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 377; CHECK: scalar.ph: 378; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 379; CHECK-NEXT: br label [[BODY:%.*]] 380; CHECK: body: 381; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 382; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 383; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 384; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE]] 385; CHECK: then: 386; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[TMP8]] to i64 387; CHECK-NEXT: [[IDX1:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]] 388; CHECK-NEXT: [[IDX2:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]] 389; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[IDX2]], align 8 390; CHECK-NEXT: br label [[ELSE]] 391; CHECK: else: 392; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ] 393; CHECK-NEXT: store double [[PHI]], ptr [[P1]], align 8 394; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1 395; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 1024 396; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]] 397; CHECK: exit: 398; CHECK-NEXT: ret void 399; 400entry: 401 br label %body 402 403body: 404 %iv = phi i64 [ %next, %else ], [ 0, %entry ] 405 %0 = trunc i64 %iv to i32 406 %c = icmp eq i32 %0, 0 407 br i1 %c, label %then, label %else 408 409then: 410 %zext = zext nneg i32 %0 to i64 411 %idx1 = getelementptr double, ptr %p, i64 %zext 412 %idx2 = getelementptr double, ptr %p, i64 %zext 413 %1 = load double, ptr %idx2, align 8 414 br label %else 415 416else: 417 %phi = phi double [ %1, %then ], [ 0.000000e+00, %body ] 418 store double %phi, ptr %p1, align 8 419 %next = add i64 %iv, 1 420 %cmp = icmp eq i64 %next, 1024 421 br i1 %cmp, label %exit, label %body 422 423exit: 424 ret void 425} 426 427; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather. 428define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input, 429 ptr %output) local_unnamed_addr #0 { 430; CHECK-LABEL: @preserve_vector_exact_no_addr( 431; CHECK: vector.body: 432; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 433; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 434; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 435; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer 436; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) 437; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer 438; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] 439; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) 440; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) 441; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP8]] 442; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 443; 444entry: 445 br label %loop.header 446 447loop.header: 448 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 449 %i7 = icmp ne i64 %iv, 0 450 %i8 = and i64 %iv, 1 451 %i9 = icmp eq i64 %i8, 0 452 %i10 = and i1 %i7, %i9 453 br i1 %i10, label %if.end, label %if.then 454 455if.then: 456 %i26 = sdiv exact i64 %iv, 2 457 %i29 = getelementptr inbounds float, ptr %input, i64 %i26 458 %i30 = load float, ptr %i29, align 4, !invariant.load !0 459 br label %if.end 460 461if.end: 462 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 463 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 464 store float %i34, ptr %i35, align 4 465 %iv.inc = add nuw nsw i64 %iv, 1 466 %exitcond = icmp eq i64 %iv.inc, 4 467 br i1 %exitcond, label %loop.exit, label %loop.header 468 469loop.exit: 470 ret void 471} 472 473; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation 474; of any masked load/store/gather/scatter. 475define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { 476; CHECK-LABEL: @preserve_exact_no_addr( 477; CHECK: vector.body: 478; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 479; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 480; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 481; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 482; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 483; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) 484; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer 485; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] 486; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 487; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4 488entry: 489 br label %loop.header 490 491loop.header: 492 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 493 %i23 = icmp eq i64 %iv, 0 494 br i1 %i23, label %if.end, label %if.then 495 496if.then: 497 %i27 = sdiv exact i64 %iv, 2 498 br label %if.end 499 500if.end: 501 %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ] 502 %i35 = getelementptr inbounds i64, ptr %output, i64 %iv 503 store i64 %i34, ptr %i35, align 4 504 %iv.inc = add nuw nsw i64 %iv, 1 505 %exitcond = icmp eq i64 %iv.inc, 4 506 br i1 %exitcond, label %loop.exit, label %loop.header 507 508loop.exit: 509 ret void 510} 511 512; Make sure we don't vectorize a loop with a phi feeding a poison value to 513; a masked load/gather. 514define void @dont_vectorize_poison_phi(ptr noalias nocapture readonly %input, 515; CHECK-LABEL: @dont_vectorize_poison_phi( 516; CHECK-NEXT: entry: 517; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 518; CHECK: loop.header: 519; CHECK-NEXT: [[POISON:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[IF_END:%.*]] ] 520; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_INC]], [[IF_END]] ] 521; CHECK-NEXT: [[I23:%.*]] = icmp eq i64 [[IV]], 0 522; CHECK-NEXT: br i1 [[I23]], label [[IF_END]], label [[IF_THEN:%.*]] 523; CHECK: if.then: 524; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], i64 [[POISON]] 525; CHECK-NEXT: [[I30:%.*]] = load float, ptr [[I29]], align 4, !invariant.load !0 526; CHECK-NEXT: br label [[IF_END]] 527; CHECK: if.end: 528; CHECK-NEXT: [[I34:%.*]] = phi float [ 0.000000e+00, [[LOOP_HEADER]] ], [ [[I30]], [[IF_THEN]] ] 529; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, ptr [[OUTPUT:%.*]], i64 [[IV]] 530; CHECK-NEXT: store float [[I34]], ptr [[I35]], align 4 531; CHECK-NEXT: [[IV_INC]] = add nuw nsw i64 [[IV]], 1 532; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4 533; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]] 534; CHECK: loop.exit: 535; CHECK-NEXT: ret void 536; 537 ptr %output) local_unnamed_addr #0 { 538entry: 539 br label %loop.header 540 541loop.header: 542 %poison = phi i64 [ poison, %entry ], [ %iv.inc, %if.end ] 543 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 544 %i23 = icmp eq i64 %iv, 0 545 br i1 %i23, label %if.end, label %if.then 546 547if.then: 548 %i29 = getelementptr inbounds float, ptr %input, i64 %poison 549 %i30 = load float, ptr %i29, align 4, !invariant.load !0 550 br label %if.end 551 552if.end: 553 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 554 %i35 = getelementptr inbounds float, ptr %output, i64 %iv 555 store float %i34, ptr %i35, align 4 556 %iv.inc = add nuw nsw i64 %iv, 1 557 %exitcond = icmp eq i64 %iv.inc, 4 558 br i1 %exitcond, label %loop.exit, label %loop.header 559 560loop.exit: 561 ret void 562} 563 564@c = external global [5 x i8] 565 566; Test case for https://github.com/llvm/llvm-project/issues/70590. 567; Note that the then block has UB, but I could not find any other way to 568; construct a suitable test case. 569define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { 570; CHECK-LABEL: @pr70590_recipe_without_underlying_instr( 571; CHECK: vector.body: 572; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE6:%.*]] ] 573; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ] 574; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 575; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]], 576; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) 577; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 578; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] 579; CHECK: pred.srem.if: 580; CHECK-NEXT: [[TMP4:%.*]] = srem i64 3, 0 581; CHECK-NEXT: br label [[PRED_SREM_CONTINUE]] 582; CHECK: pred.srem.continue: 583; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ] 584; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 585; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]] 586; CHECK: pred.srem.if1: 587; CHECK-NEXT: [[TMP7:%.*]] = srem i64 3, 0 588; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] 589; CHECK: pred.srem.continue2: 590; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 591; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]] 592; CHECK: pred.srem.if3: 593; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0 594; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]] 595; CHECK: pred.srem.continue4: 596; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 597; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]] 598; CHECK: pred.srem.if5: 599; CHECK-NEXT: [[TMP13:%.*]] = srem i64 3, 0 600; CHECK-NEXT: br label [[PRED_SREM_CONTINUE6]] 601; CHECK: pred.srem.continue6: 602; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP5]], -3 603; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], [[TMP15]] 604; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]] 605; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 606; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 607; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> [[WIDE_LOAD]], <4 x i8> zeroinitializer 608; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]] 609; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 610; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4 611; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 612; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 613; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body 614; CHECK: middle.block: 615 616entry: 617 br label %loop.header 618 619loop.header: 620 %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ] 621 %cmp = icmp eq i64 %iv, %n 622 br i1 %cmp, label %loop.latch, label %then 623 624then: 625 %rem = srem i64 3, 0 626 %add3 = add i64 %rem, -3 627 %add5 = add i64 %iv, %add3 628 %gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5 629 %l = load i8, ptr %gep, align 1 630 br label %loop.latch 631 632loop.latch: 633 %sr = phi i8 [ 0, %loop.header ], [ %l , %then ] 634 %gep.dst = getelementptr i8, ptr %dst, i64 %iv 635 store i8 %sr, ptr %gep.dst, align 4 636 %inc = add i64 %iv, 1 637 %exitcond.not = icmp eq i64 %inc, 4 638 br i1 %exitcond.not, label %exit, label %loop.header 639 640exit: 641 ret void 642} 643 644; %B.gep.0 and pointers based on it can preserve inbounds, as the inbounds 645; versionused unconditionally in the store in the latch. 646; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore 647define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr readonly %C) #0 { 648; CHECK-LABEL: define void @Bgep_inbounds_unconditionally_due_to_store( 649; CHECK: vector.body: 650; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] 651; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 652; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %C, i64 [[TMP0]] 653; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 654; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 655; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) 656; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[TMP0]] 657; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0 658; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 659; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00) 660; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP6]] 661; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0 662; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 663; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 664; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 665; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body 666 667entry: 668 br label %loop.body 669 670loop.body: 671 %iv1 = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 672 %C.gep = getelementptr inbounds i32, ptr %C, i64 %iv1 673 %C.lv = load i32, ptr %C.gep, align 4 674 %cmp = icmp eq i32 %C.lv, 20 675 %B.gep.0 = getelementptr inbounds float, ptr %B, i64 %iv1 676 br i1 %cmp, label %loop.latch, label %else 677 678else: 679 %B.lv = load float, ptr %B.gep.0, align 4 680 %add = fadd float %B.lv, 2.0 681 br label %loop.latch 682 683loop.latch: 684 %add.sink = phi float [ %add, %else ], [ 33.0, %loop.body ] 685 store float %add.sink, ptr %B.gep.0, align 4 686 %iv.next = add nuw nsw i64 %iv1, 1 687 %exitcond.not = icmp eq i64 %iv.next, 10000 688 br i1 %exitcond.not, label %exit, label %loop.body 689 690exit: 691 ret void 692} 693 694attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" } 695 696!0 = !{} 697