1; REQUIRES: asserts 2; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_UNMASKED 3; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_MASKED 4 5target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 6 7; We test here that the loop-vectorizer forms an interleave-groups from 8; predicated memory accesses only if they are both in the same (predicated) 9; block (first scenario below). 10; If the accesses are not in the same predicated block, an interleave-group 11; is not formed (scenarios 2,3 below). 12 13; Scenario 1: Check the case where it is legal to create masked interleave- 14; groups. Altogether two groups are created (one for loads and one for stores) 15; when masked-interleaved-acceses are enabled. When masked-interleaved-acceses 16; are disabled we do not create any interleave-group. 17; 18; void masked_strided1(const unsigned char* restrict p, 19; unsigned char* restrict q, 20; unsigned char guard) { 21; for(ix=0; ix < 1024; ++ix) { 22; if (ix > guard) { 23; char left = p[2*ix]; 24; char right = p[2*ix + 1]; 25; char max = max(left, right); 26; q[2*ix] = max; 27; q[2*ix+1] = 0 - max; 28; } 29; } 30;} 31 32 33; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided1' 34; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... 35; STRIDED_UNMASKED-NOT: LV: Creating an interleave group 36 37; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1' 38; STRIDED_MASKED: LV: Analyzing interleaved accesses... 39; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 %{{.*}}, ptr %{{.*}}, align 1 40; STRIDED_MASKED-NEXT: LV: Inserted: store i8 %{{.*}}, ptr %{{.*}}, align 1 41; STRIDED_MASKED-NEXT: into the interleave group with store i8 %{{.*}}, ptr %{{.*}}, align 1 42; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: %{{.*}} = load i8, ptr %{{.*}}, align 1 43; STRIDED_MASKED-NEXT: LV: Inserted: %{{.*}} = load i8, ptr %{{.*}}, align 1 44; STRIDED_MASKED-NEXT: into the interleave group with %{{.*}} = load i8, ptr %{{.*}}, align 1 45 46; Scenario 2: Check the case where it is illegal to create a masked interleave- 47; group because the first access is predicated, and the second isn't. 48; We therefore create a separate interleave-group with gaps for each of the 49; stores (if masked-interleaved-accesses are enabled). 50; If masked-interleaved-accesses is not enabled we create only one interleave 51; group of stores (for the non-predicated store) and it is later invalidated 52; due to gaps. 53; 54; void masked_strided2(const unsigned char* restrict p, 55; unsigned char* restrict q, 56; unsigned char guard1, 57; unsigned char guard2) { 58; for(ix=0; ix < 1024; ++ix) { 59; if (ix > guard1) { 60; q[2*ix] = 1; 61; } 62; q[2*ix+1] = 2; 63; } 64;} 65 66; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2' 67; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... 68; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1 69; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. 70; STRIDED_UNMASKED-NOT: LV: Creating an interleave group 71 72; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2' 73; STRIDED_MASKED: LV: Analyzing interleaved accesses... 74; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, ptr %{{.*}}, align 1 75; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1 76; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps. 77 78 79; Scenario 3: Check the case where it is illegal to create a masked interleave- 80; group because the two accesses are in separate predicated blocks. 81; We therefore create a separate interleave-group with gaps for each of the accesses, 82; If masked-interleaved-accesses is not enabled we don't create any interleave 83; group because all accesses are predicated. 84; 85; void masked_strided3(const unsigned char* restrict p, 86; unsigned char* restrict q, 87; unsigned char guard1, 88; unsigned char guard2) { 89; for(ix=0; ix < 1024; ++ix) { 90; if (ix > guard1) { 91; q[2*ix] = 1; 92; } 93; if (ix > guard2) { 94; q[2*ix+1] = 2; 95; } 96; } 97;} 98 99 100; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided3' 101; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... 102; STRIDED_UNMASKED-NOT: LV: Creating an interleave group 103 104; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3' 105; STRIDED_MASKED: LV: Analyzing interleaved accesses... 106; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, ptr %{{.*}}, align 1 107; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1 108; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps. 109 110 111; ModuleID = 'test.c' 112source_filename = "test.c" 113target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 114target triple = "i386-unknown-linux-gnu" 115 116define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 { 117entry: 118 %conv = zext i8 %guard to i32 119 br label %for.body 120 121for.body: 122 %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 123 %cmp1 = icmp ugt i32 %ix.024, %conv 124 br i1 %cmp1, label %if.then, label %for.inc 125 126if.then: 127 %mul = shl nuw nsw i32 %ix.024, 1 128 %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul 129 %0 = load i8, ptr %arrayidx, align 1 130 %add = or disjoint i32 %mul, 1 131 %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add 132 %1 = load i8, ptr %arrayidx4, align 1 133 %cmp.i = icmp slt i8 %0, %1 134 %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0 135 %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul 136 store i8 %spec.select.i, ptr %arrayidx6, align 1 137 %sub = sub i8 0, %spec.select.i 138 %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add 139 store i8 %sub, ptr %arrayidx11, align 1 140 br label %for.inc 141 142for.inc: 143 %inc = add nuw nsw i32 %ix.024, 1 144 %exitcond = icmp eq i32 %inc, 1024 145 br i1 %exitcond, label %for.end, label %for.body 146 147for.end: 148 ret void 149} 150 151 152define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 { 153entry: 154 %conv = zext i8 %guard to i32 155 br label %for.body 156 157for.body: 158 %ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 159 %mul = shl nuw nsw i32 %ix.012, 1 160 %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul 161 store i8 1, ptr %arrayidx, align 1 162 %cmp1 = icmp ugt i32 %ix.012, %conv 163 br i1 %cmp1, label %if.then, label %for.inc 164 165if.then: 166 %add = or disjoint i32 %mul, 1 167 %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %add 168 store i8 2, ptr %arrayidx3, align 1 169 br label %for.inc 170 171for.inc: 172 %inc = add nuw nsw i32 %ix.012, 1 173 %exitcond = icmp eq i32 %inc, 1024 174 br i1 %exitcond, label %for.end, label %for.body 175 176for.end: 177 ret void 178} 179 180 181define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 { 182entry: 183 %conv = zext i8 %guard1 to i32 184 %conv3 = zext i8 %guard2 to i32 185 br label %for.body 186 187for.body: 188 %ix.018 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 189 %mul = shl nuw nsw i32 %ix.018, 1 190 %cmp1 = icmp ugt i32 %ix.018, %conv 191 br i1 %cmp1, label %if.then, label %if.end 192 193if.then: 194 %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul 195 store i8 1, ptr %arrayidx, align 1 196 br label %if.end 197 198if.end: 199 %cmp4 = icmp ugt i32 %ix.018, %conv3 200 br i1 %cmp4, label %if.then6, label %for.inc 201 202if.then6: 203 %add = or disjoint i32 %mul, 1 204 %arrayidx7 = getelementptr inbounds i8, ptr %q, i32 %add 205 store i8 2, ptr %arrayidx7, align 1 206 br label %for.inc 207 208for.inc: 209 %inc = add nuw nsw i32 %ix.018, 1 210 %exitcond = icmp eq i32 %inc, 1024 211 br i1 %exitcond, label %for.end, label %for.body 212 213for.end: 214 ret void 215} 216 217attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } 218