xref: /llvm-project/llvm/test/Transforms/LoopVectorize/loop-scalars.ll (revision 23c2f2e6b24d80d3472dca897feac10e9c34c51f)
1; REQUIRES: asserts
2; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s
3; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-vectorize,instcombine -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
4
5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
6
7; CHECK-LABEL: vector_gep
8; CHECK-NOT:   LV: Found scalar instruction: %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i
9; CHECK:       vector.body:
10; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
11; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
12; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, <2 x i64> [[VEC_IND]]
13; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]]
14; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>*
15; CHECK-NEXT:    store <2 x i32*> [[TMP1]], <2 x i32*>* [[TMP3]], align 8
16; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
17; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
18; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
19;
20define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
21entry:
22  br label %for.body
23
24for.body:
25  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
26  %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i
27  %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i
28  store i32* %tmp0, i32** %tmp1, align 8
29  %i.next = add nuw nsw i64 %i, 1
30  %cond = icmp slt i64 %i.next, %n
31  br i1 %cond, label %for.body, label %for.end
32
33for.end:
34  ret void
35}
36
37; CHECK-LABEL: scalar_store
38; CHECK:       LV: Found scalar instruction: %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i
39; CHECK-NEXT:  LV: Found scalar instruction: %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i
40; CHECK-NEXT:  LV: Found scalar instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
41; CHECK-NEXT:  LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
42; CHECK:       vector.body:
43; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
44; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
45; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 2
46; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* %b, i64 [[OFFSET_IDX]]
47; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* %b, i64 [[TMP4]]
48; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[OFFSET_IDX]]
49; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[TMP4]]
50; CHECK-NEXT:    store i32* [[TMP5]], i32** [[TMP7]], align 8
51; CHECK-NEXT:    store i32* [[TMP6]], i32** [[TMP8]], align 8
52; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
53; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
54;
55define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
56entry:
57  br label %for.body
58
59for.body:
60  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
61  %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i
62  %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i
63  store i32* %tmp0, i32** %tmp1, align 8
64  %i.next = add nuw nsw i64 %i, 2
65  %cond = icmp slt i64 %i.next, %n
66  br i1 %cond, label %for.body, label %for.end
67
68for.end:
69  ret void
70}
71
72; CHECK-LABEL: expansion
73; CHECK:       LV: Found scalar instruction: %tmp3 = getelementptr inbounds i32*, i32** %tmp2, i64 %i
74; CHECK-NEXT:  LV: Found scalar instruction: %tmp1 = bitcast i64* %tmp0 to i32*
75; CHECK-NEXT:  LV: Found scalar instruction: %tmp2 = getelementptr inbounds i32*, i32** %a, i64 0
76; CHECK-NEXT:  LV: Found scalar instruction: %tmp0 = getelementptr inbounds i64, i64* %b, i64 %i
77; CHECK-NEXT:  LV: Found scalar instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
78; CHECK-NEXT:  LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
79; CHECK:       vector.body:
80; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
81; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
82; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 2
83; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* %b, i64 [[OFFSET_IDX]]
84; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, i64* %b, i64 [[TMP4]]
85; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[OFFSET_IDX]]
86; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[TMP4]]
87; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32** [[TMP7]] to i64**
88; CHECK-NEXT:    store i64* [[TMP5]], i64** [[TMP9]], align 8
89; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32** [[TMP8]] to i64**
90; CHECK-NEXT:    store i64* [[TMP6]], i64** [[TMP10]], align 8
91; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
92; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
93;
94define void @expansion(i32** %a, i64 *%b, i64 %n) {
95entry:
96  br label %for.body
97
98for.body:
99  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
100  %tmp0 = getelementptr inbounds i64, i64* %b, i64 %i
101  %tmp1 = bitcast i64* %tmp0 to i32*
102  %tmp2 = getelementptr inbounds i32*, i32** %a, i64 0
103  %tmp3 = getelementptr inbounds i32*, i32** %tmp2, i64 %i
104  store i32* %tmp1, i32** %tmp3, align 8
105  %i.next = add nuw nsw i64 %i, 2
106  %cond = icmp slt i64 %i.next, %n
107  br i1 %cond, label %for.body, label %for.end
108
109for.end:
110  ret void
111}
112
113; CHECK-LABEL: no_gep_or_bitcast
114; CHECK-NOT:   LV: Found scalar instruction: %tmp1 = load i32*, i32** %tmp0, align 8
115; CHECK:       LV: Found scalar instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
116; CHECK-NEXT:  LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 1
117; CHECK:       vector.body:
118; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
119; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]]
120; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
121; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP2]], align 8
122; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i32 0
123; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 8
124; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i32 1
125; CHECK-NEXT:    store i32 0, i32* [[TMP4]], align 8
126; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
127; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
128;
129define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
130entry:
131  br label %for.body
132
133for.body:
134  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
135  %tmp0 = getelementptr inbounds i32*, i32** %a, i64 %i
136  %tmp1 = load i32*, i32** %tmp0, align 8
137  store i32 0, i32* %tmp1, align 8
138  %i.next = add nuw nsw i64 %i, 1
139  %cond = icmp slt i64 %i.next, %n
140  br i1 %cond, label %for.body, label %for.end
141
142for.end:
143  ret void
144}
145