1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; REQUIRES: asserts 3 4; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s 5 6target triple = "aarch64-unknown-linux-gnu" 7 8;; Given the choice between a masked and unmasked variant for the same VF (4) 9;; where no mask is required, make sure we choose the unmasked variant. 10 11; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m' 12; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { 13; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 14; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count 15; CHECK-NEXT: Live-in ir<1024> = original trip-count 16; CHECK-EMPTY: 17; CHECK-NEXT: ir-bb<entry>: 18; CHECK-NEXT: Successor(s): vector.ph 19; CHECK-EMPTY: 20; CHECK-NEXT: vector.ph: 21; CHECK-NEXT: Successor(s): vector loop 22; CHECK-EMPTY: 23; CHECK-NEXT: <x1> vector loop: { 24; CHECK-NEXT: vector.body: 25; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION 26; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 27; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> 28; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> 29; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]> 30; CHECK-NEXT: REPLICATE ir<%call> = call @foo(ir<%load>) 31; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> 32; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> 33; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> 34; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> 35; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> 36; CHECK-NEXT: No successors 37; CHECK-NEXT: } 38; CHECK-NEXT: Successor(s): middle.block 39; CHECK-EMPTY: 40; CHECK-NEXT: middle.block: 41; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> 42; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> 43; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph 44; CHECK-EMPTY: 45; CHECK-NEXT: scalar.ph: 46; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> 47; CHECK-NEXT: Successor(s): ir-bb<for.body> 48; CHECK-EMPTY: 49; CHECK-NEXT: ir-bb<for.body>: 50; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) 51; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 52; CHECK-NEXT: No successors 53; CHECK-EMPTY: 54; CHECK-NEXT: ir-bb<for.cond.cleanup>: 55; CHECK-NEXT: No successors 56; CHECK-NEXT: } 57 58; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { 59; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 60; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count 61; CHECK-NEXT: Live-in ir<1024> = original trip-count 62; CHECK-EMPTY: 63; CHECK-NEXT: ir-bb<entry>: 64; CHECK-NEXT: Successor(s): vector.ph 65; CHECK-EMPTY: 66; CHECK-NEXT: vector.ph: 67; CHECK-NEXT: Successor(s): vector loop 68; CHECK-EMPTY: 69; CHECK-NEXT: <x1> vector loop: { 70; CHECK-NEXT: vector.body: 71; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION 72; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 73; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> 74; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> 75; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]> 76; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask) 77; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> 78; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> 79; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> 80; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> 81; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> 82; CHECK-NEXT: No successors 83; CHECK-NEXT: } 84; CHECK-NEXT: Successor(s): middle.block 85; CHECK-EMPTY: 86; CHECK-NEXT: middle.block: 87; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> 88; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> 89; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph 90; CHECK-EMPTY: 91; CHECK-NEXT: scalar.ph: 92; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> 93; CHECK-NEXT: Successor(s): ir-bb<for.body> 94; CHECK-EMPTY: 95; CHECK-NEXT: ir-bb<for.body>: 96; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) 97; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 98; CHECK-NEXT: No successors 99; CHECK-EMPTY: 100; CHECK-NEXT: ir-bb<for.cond.cleanup>: 101; CHECK-NEXT: No successors 102; CHECK-NEXT: } 103 104;; If we have a masked variant at one VF and an unmasked variant at a different 105;; VF, ensure we create appropriate recipes (including a synthesized all-true 106;; mask for the masked variant) 107 108; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m' 109; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { 110; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 111; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count 112; CHECK-NEXT: Live-in ir<1024> = original trip-count 113; CHECK-EMPTY: 114; CHECK-NEXT: ir-bb<entry>: 115; CHECK-NEXT: Successor(s): vector.ph 116; CHECK-EMPTY: 117; CHECK-NEXT: vector.ph: 118; CHECK-NEXT: Successor(s): vector loop 119; CHECK-EMPTY: 120; CHECK-NEXT: <x1> vector loop: { 121; CHECK-NEXT: vector.body: 122; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION 123; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 124; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> 125; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> 126; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]> 127; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask) 128; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> 129; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> 130; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%call> 131; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXST:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> 132; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> 133; CHECK-NEXT: No successors 134; CHECK-NEXT: } 135; CHECK-NEXT: Successor(s): middle.block 136; CHECK-EMPTY: 137; CHECK-NEXT: middle.block: 138; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> 139; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> 140; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph 141; CHECK-EMPTY: 142; CHECK-NEXT: scalar.ph: 143; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> 144; CHECK-NEXT: Successor(s): ir-bb<for.body> 145; CHECK-EMPTY: 146; CHECK-NEXT: ir-bb<for.body>: 147; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 148; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 149; CHECK-NEXT: No successors 150; CHECK-EMPTY: 151; CHECK-NEXT: ir-bb<for.cond.cleanup>: 152; CHECK-NEXT: No successors 153; CHECK-NEXT: } 154 155; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { 156; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 157; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count 158; CHECK-NEXT: Live-in ir<1024> = original trip-count 159; CHECK-EMPTY: 160; CHECK-NEXT: ir-bb<entry>: 161; CHECK-NEXT: Successor(s): vector.ph 162; CHECK-EMPTY: 163; CHECK-NEXT: vector.ph: 164; CHECK-NEXT: Successor(s): vector loop 165; CHECK-EMPTY: 166; CHECK-NEXT: <x1> vector loop: { 167; CHECK-NEXT: vector.body: 168; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION 169; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 170; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> 171; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> 172; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]> 173; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>, ir<true>) (using library function: foo_vector_fixed4_mask) 174; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> 175; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> 176; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> 177; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> 178; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> 179; CHECK-NEXT: No successors 180; CHECK-NEXT: } 181; CHECK-NEXT: Successor(s): middle.block 182; CHECK-EMPTY: 183; CHECK-NEXT: middle.block: 184; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> 185; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> 186; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph 187; CHECK-EMPTY: 188; CHECK-NEXT: scalar.ph: 189; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> 190; CHECK-NEXT: Successor(s): ir-bb<for.body> 191; CHECK-EMPTY: 192; CHECK-NEXT: ir-bb<for.body>: 193; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) 194; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 195; CHECK-NEXT: No successors 196; CHECK-EMPTY: 197; CHECK-NEXT: ir-bb<for.cond.cleanup>: 198; CHECK-NEXT: No successors 199; CHECK-NEXT: } 200 201;; If we have two variants at different VFs, neither of which are masked, we 202;; still expect to see a different vplan per VF. 203 204; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4' 205; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { 206; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 207; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count 208; CHECK-NEXT: Live-in ir<1024> = original trip-count 209; CHECK-EMPTY: 210; CHECK-NEXT: ir-bb<entry>: 211; CHECK-NEXT: Successor(s): vector.ph 212; CHECK-EMPTY: 213; CHECK-NEXT: vector.ph: 214; CHECK-NEXT: Successor(s): vector loop 215; CHECK-EMPTY: 216; CHECK-NEXT: <x1> vector loop: { 217; CHECK-NEXT: vector.body: 218; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION 219; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 220; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> 221; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> 222; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]> 223; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask) 224; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> 225; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> 226; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> 227; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> 228; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> 229; CHECK-NEXT: No successors 230; CHECK-NEXT: } 231; CHECK-NEXT: Successor(s): middle.block 232; CHECK-EMPTY: 233; CHECK-NEXT: middle.block: 234; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> 235; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> 236; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph 237; CHECK-EMPTY: 238; CHECK-NEXT: scalar.ph: 239; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> 240; CHECK-NEXT: Successor(s): ir-bb<for.body> 241; CHECK-EMPTY: 242; CHECK-NEXT: ir-bb<for.body>: 243; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 244; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 245; CHECK-NEXT: No successors 246; CHECK-EMPTY: 247; CHECK-NEXT: ir-bb<for.cond.cleanup>: 248; CHECK-NEXT: No successors 249; CHECK-NEXT: } 250 251; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { 252; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 253; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count 254; CHECK-NEXT: Live-in ir<1024> = original trip-count 255; CHECK-EMPTY: 256; CHECK-NEXT: ir-bb<entry>: 257; CHECK-NEXT: Successor(s): vector.ph 258; CHECK-EMPTY: 259; CHECK-NEXT: vector.ph: 260; CHECK-NEXT: Successor(s): vector loop 261; CHECK-EMPTY: 262; CHECK-NEXT: <x1> vector loop: { 263; CHECK-NEXT: vector.body: 264; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION 265; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 266; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> 267; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> 268; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]> 269; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask) 270; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> 271; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> 272; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> 273; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> 274; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> 275; CHECK-NEXT: No successors 276; CHECK-NEXT: } 277; CHECK-NEXT: Successor(s): middle.block 278; CHECK-EMPTY: 279; CHECK-NEXT: middle.block: 280; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> 281; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> 282; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph 283; CHECK-EMPTY: 284; CHECK-NEXT: scalar.ph: 285; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> 286; CHECK-NEXT: Successor(s): ir-bb<for.body> 287; CHECK-EMPTY: 288; CHECK-NEXT: ir-bb<for.body>: 289; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) 290; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 291; CHECK-NEXT: No successors 292; CHECK-EMPTY: 293; CHECK-NEXT: ir-bb<for.cond.cleanup>: 294; CHECK-NEXT: No successors 295; CHECK-NEXT: } 296 297define void @test_v4_v4m(ptr noalias %a, ptr readonly %b) #3 { 298; CHECK-LABEL: @test_v4_v4m( 299; CHECK-NEXT: entry: 300; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 301; CHECK: vector.ph: 302; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 303; CHECK: vector.body: 304; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 305; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 306; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]] 307; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 308; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 309; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_nomask(<4 x i64> [[WIDE_LOAD]]) 310; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 311; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 312; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8 313; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 314; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 315; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 316; CHECK: middle.block: 317; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 318; CHECK: scalar.ph: 319; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 320; CHECK-NEXT: br label [[FOR_BODY:%.*]] 321; CHECK: for.body: 322; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 323; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 324; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 325; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]] 326; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 327; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 328; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 329; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 330; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 331; CHECK: for.cond.cleanup: 332; CHECK-NEXT: ret void 333; 334entry: 335 br label %for.body 336 337for.body: 338 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 339 %gep = getelementptr i64, ptr %b, i64 %indvars.iv 340 %load = load i64, ptr %gep 341 %call = call i64 @foo(i64 %load) #0 342 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 343 store i64 %call, ptr %arrayidx 344 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 345 %exitcond = icmp eq i64 %indvars.iv.next, 1024 346 br i1 %exitcond, label %for.cond.cleanup, label %for.body 347 348for.cond.cleanup: 349 ret void 350 351} 352 353define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 { 354; CHECK-LABEL: @test_v2_v4m( 355; CHECK-NEXT: entry: 356; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 357; CHECK: vector.ph: 358; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 359; CHECK: vector.body: 360; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 361; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 362; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]] 363; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 364; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 365; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> splat (i1 true)) 366; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 367; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 368; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8 369; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 370; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 371; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 372; CHECK: middle.block: 373; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 374; CHECK: scalar.ph: 375; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 376; CHECK-NEXT: br label [[FOR_BODY:%.*]] 377; CHECK: for.body: 378; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 379; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 380; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 381; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2:[0-9]+]] 382; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 383; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 384; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 385; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 386; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 387; CHECK: for.cond.cleanup: 388; CHECK-NEXT: ret void 389; 390entry: 391 br label %for.body 392 393for.body: 394 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 395 %gep = getelementptr i64, ptr %b, i64 %indvars.iv 396 %load = load i64, ptr %gep 397 %call = call i64 @foo(i64 %load) #1 398 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 399 store i64 %call, ptr %arrayidx 400 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 401 %exitcond = icmp eq i64 %indvars.iv.next, 1024 402 br i1 %exitcond, label %for.cond.cleanup, label %for.body 403 404for.cond.cleanup: 405 ret void 406 407} 408 409define void @test_v2_v4(ptr noalias %a, ptr readonly %b) #3 { 410; CHECK-LABEL: @test_v2_v4( 411; CHECK-NEXT: entry: 412; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 413; CHECK: vector.ph: 414; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 415; CHECK: vector.body: 416; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 417; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 418; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]] 419; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 420; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 421; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_nomask(<4 x i64> [[WIDE_LOAD]]) 422; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 423; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 424; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8 425; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 426; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 427; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 428; CHECK: middle.block: 429; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 430; CHECK: scalar.ph: 431; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 432; CHECK-NEXT: br label [[FOR_BODY:%.*]] 433; CHECK: for.body: 434; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 435; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] 436; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 437; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]] 438; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 439; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 440; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 441; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 442; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 443; CHECK: for.cond.cleanup: 444; CHECK-NEXT: ret void 445; 446entry: 447 br label %for.body 448 449for.body: 450 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 451 %gep = getelementptr i64, ptr %b, i64 %indvars.iv 452 %load = load i64, ptr %gep 453 %call = call i64 @foo(i64 %load) #2 454 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 455 store i64 %call, ptr %arrayidx 456 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 457 %exitcond = icmp eq i64 %indvars.iv.next, 1024 458 br i1 %exitcond, label %for.cond.cleanup, label %for.body 459 460for.cond.cleanup: 461 ret void 462 463} 464 465declare i64 @foo(i64) 466 467;; fixed vector variants of foo 468declare <2 x i64> @foo_vector_fixed2_nomask(<2 x i64>) 469declare <4 x i64> @foo_vector_fixed4_nomask(<4 x i64>) 470declare <4 x i64> @foo_vector_fixed4_mask(<4 x i64>, <4 x i1>) 471 472attributes #0 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N4v_foo(foo_vector_fixed4_nomask),_ZGV_LLVM_M4v_foo(foo_vector_fixed4_mask)" } 473attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_foo(foo_vector_fixed2_nomask),_ZGV_LLVM_M4v_foo(foo_vector_fixed4_mask)" } 474attributes #2 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_foo(foo_vector_fixed2_nomask),_ZGV_LLVM_N4v_foo(foo_vector_fixed4_nomask)" } 475attributes #3 = { "target-features"="+sve" vscale_range(2,16) "no-trapping-math"="false" } 476