1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -passes=loop-vectorize,dce,instcombine -force-vector-width=2 < %s | FileCheck %s 3; 4; Test that loop vectorizer does not generate vector addresses that must then 5; always be extracted. 6 7; Check that the addresses for a scalarized memory access is not extracted 8; from a vector register. 9define i32 @foo(ptr nocapture %A) { 10; CHECK-LABEL: @foo( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 13; CHECK: vector.ph: 14; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 15; CHECK: vector.body: 16; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 17; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 2 18; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[TMP0]], 4 19; CHECK-NEXT: [[DOTIDX:%.*]] = shl nsw i64 [[INDEX]], 4 20; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] 21; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] 22; CHECK-NEXT: store i32 4, ptr [[TMP2]], align 4 23; CHECK-NEXT: store i32 4, ptr [[TMP3]], align 4 24; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 25; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 26; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 27; CHECK: middle.block: 28; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 29; CHECK: scalar.ph: 30; CHECK-NEXT: br label [[FOR_BODY:%.*]] 31; CHECK: for.body: 32; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 33; CHECK: for.end: 34; CHECK-NEXT: ret i32 poison 35; 36 37entry: 38 br label %for.body 39 40for.body: 41 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 42 %0 = shl nsw i64 %indvars.iv, 2 43 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0 44 store i32 4, ptr %arrayidx, align 4 45 %indvars.iv.next = add i64 %indvars.iv, 1 46 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 47 %exitcond = icmp eq i32 %lftr.wideiv, 10000 48 br i1 %exitcond, label %for.end, label %for.body 49 50for.end: 51 ret i32 poison 52} 53 54 55; Check that a load of address is scalarized. 56define i32 @foo1(ptr nocapture noalias %A, ptr nocapture %PtrPtr) { 57; CHECK-LABEL: @foo1( 58; CHECK-NEXT: entry: 59; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 60; CHECK: vector.ph: 61; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 62; CHECK: vector.body: 63; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 64; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1 65; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR:%.*]], i64 [[INDEX]] 66; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR]], i64 [[TMP0]] 67; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 68; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8 69; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 70; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 71; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0 72; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 73; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 74; CHECK-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 75; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 76; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 77; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 78; CHECK: middle.block: 79; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 80; CHECK: scalar.ph: 81; CHECK-NEXT: br label [[FOR_BODY:%.*]] 82; CHECK: for.body: 83; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 84; CHECK: for.end: 85; CHECK-NEXT: ret i32 poison 86; 87 88entry: 89 br label %for.body 90 91for.body: 92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 93 %ptr = getelementptr inbounds ptr, ptr %PtrPtr, i64 %indvars.iv 94 %el = load ptr, ptr %ptr 95 %v = load i32, ptr %el 96 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 97 store i32 %v, ptr %arrayidx, align 4 98 %indvars.iv.next = add i64 %indvars.iv, 1 99 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 100 %exitcond = icmp eq i32 %lftr.wideiv, 10000 101 br i1 %exitcond, label %for.end, label %for.body 102 103for.end: 104 ret i32 poison 105} 106