xref: /llvm-project/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll (revision eecb99c5f66c8491766628a2925587e20f3b1dbd)
1; REQUIRES: asserts
2; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_UNMASKED
3; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_MASKED
4
5target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
6
7; We test here that the loop-vectorizer forms an interleave-groups from
8; predicated memory accesses only if they are both in the same (predicated)
9; block (first scenario below).
10; If the accesses are not in the same predicated block, an interleave-group
11; is not formed (scenarios 2,3 below).
12
13; Scenario 1: Check the case where it is legal to create masked interleave-
14; groups. Altogether two groups are created (one for loads and one for stores)
15; when masked-interleaved-acceses are enabled. When masked-interleaved-acceses
16; are disabled we do not create any interleave-group.
17;
18; void masked_strided1(const unsigned char* restrict p,
19;                     unsigned char* restrict q,
20;                     unsigned char guard) {
21; for(ix=0; ix < 1024; ++ix) {
22;     if (ix > guard) {
23;         char left = p[2*ix];
24;         char right = p[2*ix + 1];
25;         char max = max(left, right);
26;         q[2*ix] = max;
27;         q[2*ix+1] = 0 - max;
28;     }
29; }
30;}
31
32
33; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided1'
34; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
35; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
36
37; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1'
38; STRIDED_MASKED: LV: Analyzing interleaved accesses...
39; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 %{{.*}}, ptr %{{.*}}, align 1
40; STRIDED_MASKED-NEXT: LV: Inserted:  store i8  %{{.*}}, ptr %{{.*}}, align 1
41; STRIDED_MASKED-NEXT:     into the interleave group with  store i8 %{{.*}}, ptr %{{.*}}, align 1
42; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:   %{{.*}} = load i8, ptr %{{.*}}, align 1
43; STRIDED_MASKED-NEXT: LV: Inserted:  %{{.*}} = load i8, ptr %{{.*}}, align 1
44; STRIDED_MASKED-NEXT:     into the interleave group with   %{{.*}} = load i8, ptr %{{.*}}, align 1
45
46; Scenario 2: Check the case where it is illegal to create a masked interleave-
47; group because the first access is predicated, and the second isn't.
48; We therefore create a separate interleave-group with gaps for each of the
49; stores (if masked-interleaved-accesses are enabled).
50; If masked-interleaved-accesses is not enabled we create only one interleave
51; group of stores (for the non-predicated store) and it is later invalidated
52; due to gaps.
53;
54; void masked_strided2(const unsigned char* restrict p,
55;                     unsigned char* restrict q,
56;                     unsigned char guard1,
57;                     unsigned char guard2) {
58; for(ix=0; ix < 1024; ++ix) {
59;     if (ix > guard1) {
60;         q[2*ix] = 1;
61;     }
62;     q[2*ix+1] = 2;
63; }
64;}
65
66; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2'
67; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
68; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
69; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
70; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
71
72; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2'
73; STRIDED_MASKED: LV: Analyzing interleaved accesses...
74; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, ptr %{{.*}}, align 1
75; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
76; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
77
78
79; Scenario 3: Check the case where it is illegal to create a masked interleave-
80; group because the two accesses are in separate predicated blocks.
81; We therefore create a separate interleave-group with gaps for each of the accesses,
82; If masked-interleaved-accesses is not enabled we don't create any interleave
83; group because all accesses are predicated.
84;
85; void masked_strided3(const unsigned char* restrict p,
86;                     unsigned char* restrict q,
87;                     unsigned char guard1,
88;                     unsigned char guard2) {
89; for(ix=0; ix < 1024; ++ix) {
90;     if (ix > guard1) {
91;         q[2*ix] = 1;
92;     }
93;     if (ix > guard2) {
94;         q[2*ix+1] = 2;
95;     }
96; }
97;}
98
99
100; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided3'
101; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
102; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
103
104; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3'
105; STRIDED_MASKED: LV: Analyzing interleaved accesses...
106; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, ptr %{{.*}}, align 1
107; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
108; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
109
110
111; ModuleID = 'test.c'
112source_filename = "test.c"
113target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
114target triple = "i386-unknown-linux-gnu"
115
116define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
117entry:
118  %conv = zext i8 %guard to i32
119  br label %for.body
120
121for.body:
122  %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
123  %cmp1 = icmp ugt i32 %ix.024, %conv
124  br i1 %cmp1, label %if.then, label %for.inc
125
126if.then:
127  %mul = shl nuw nsw i32 %ix.024, 1
128  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
129  %0 = load i8, ptr %arrayidx, align 1
130  %add = or disjoint i32 %mul, 1
131  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
132  %1 = load i8, ptr %arrayidx4, align 1
133  %cmp.i = icmp slt i8 %0, %1
134  %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
135  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
136  store i8 %spec.select.i, ptr %arrayidx6, align 1
137  %sub = sub i8 0, %spec.select.i
138  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
139  store i8 %sub, ptr %arrayidx11, align 1
140  br label %for.inc
141
142for.inc:
143  %inc = add nuw nsw i32 %ix.024, 1
144  %exitcond = icmp eq i32 %inc, 1024
145  br i1 %exitcond, label %for.end, label %for.body
146
147for.end:
148  ret void
149}
150
151
152define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
153entry:
154  %conv = zext i8 %guard to i32
155  br label %for.body
156
157for.body:
158  %ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
159  %mul = shl nuw nsw i32 %ix.012, 1
160  %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
161  store i8 1, ptr %arrayidx, align 1
162  %cmp1 = icmp ugt i32 %ix.012, %conv
163  br i1 %cmp1, label %if.then, label %for.inc
164
165if.then:
166  %add = or disjoint i32 %mul, 1
167  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %add
168  store i8 2, ptr %arrayidx3, align 1
169  br label %for.inc
170
171for.inc:
172  %inc = add nuw nsw i32 %ix.012, 1
173  %exitcond = icmp eq i32 %inc, 1024
174  br i1 %exitcond, label %for.end, label %for.body
175
176for.end:
177  ret void
178}
179
180
181define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
182entry:
183  %conv = zext i8 %guard1 to i32
184  %conv3 = zext i8 %guard2 to i32
185  br label %for.body
186
187for.body:
188  %ix.018 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
189  %mul = shl nuw nsw i32 %ix.018, 1
190  %cmp1 = icmp ugt i32 %ix.018, %conv
191  br i1 %cmp1, label %if.then, label %if.end
192
193if.then:
194  %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
195  store i8 1, ptr %arrayidx, align 1
196  br label %if.end
197
198if.end:
199  %cmp4 = icmp ugt i32 %ix.018, %conv3
200  br i1 %cmp4, label %if.then6, label %for.inc
201
202if.then6:
203  %add = or disjoint i32 %mul, 1
204  %arrayidx7 = getelementptr inbounds i8, ptr %q, i32 %add
205  store i8 2, ptr %arrayidx7, align 1
206  br label %for.inc
207
208for.inc:
209  %inc = add nuw nsw i32 %ix.018, 1
210  %exitcond = icmp eq i32 %inc, 1024
211  br i1 %exitcond, label %for.end, label %for.body
212
213for.end:
214  ret void
215}
216
217attributes #0 = {  "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"  }
218