1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s | FileCheck %s 3 4; Check that the SCEVs produced from the multiple loops don't attempt to get 5; combines in invalid ways. The LSR filtering could attempt to combine addrecs 6; from different loops. 7 8target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-linux-gnu" 10 11define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr nocapture readonly %2, i64 %3, i1 %min.iters.check840) "target-cpu"="icelake-server" { 12; CHECK-LABEL: in4dob_: 13; CHECK: # %bb.0: # %.preheader263 14; CHECK-NEXT: leaq (,%rcx,4), %r9 15; CHECK-NEXT: movl $1, %r10d 16; CHECK-NEXT: xorl %eax, %eax 17; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 18; CHECK-NEXT: jmp .LBB0_1 19; CHECK-NEXT: .p2align 4 20; CHECK-NEXT: .LBB0_20: # in Loop: Header=BB0_1 Depth=1 21; CHECK-NEXT: incq %r10 22; CHECK-NEXT: addq %r9, %rax 23; CHECK-NEXT: cmpq %r10, %rcx 24; CHECK-NEXT: je .LBB0_18 25; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 26; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 27; CHECK-NEXT: vucomiss %xmm0, %xmm1 28; CHECK-NEXT: jne .LBB0_20 29; CHECK-NEXT: jp .LBB0_20 30; CHECK-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 31; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 32; CHECK-NEXT: vucomiss %xmm0, %xmm1 33; CHECK-NEXT: jne .LBB0_20 34; CHECK-NEXT: jp .LBB0_20 35; CHECK-NEXT: # %bb.3: # %vector.body807.preheader 36; CHECK-NEXT: leaq 1(%rcx), %rdx 37; CHECK-NEXT: movl %edx, %esi 38; CHECK-NEXT: andl $7, %esi 39; CHECK-NEXT: cmpq $7, %rcx 40; CHECK-NEXT: jae .LBB0_5 41; CHECK-NEXT: # %bb.4: 42; CHECK-NEXT: xorl %r9d, %r9d 43; CHECK-NEXT: jmp .LBB0_7 44; CHECK-NEXT: .LBB0_5: # %vector.body807.preheader.new 45; CHECK-NEXT: movq %rdx, %r10 46; CHECK-NEXT: andq $-8, %r10 47; CHECK-NEXT: xorl %r9d, %r9d 48; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 49; CHECK-NEXT: .p2align 4 50; CHECK-NEXT: .LBB0_6: # %vector.body807 51; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 52; CHECK-NEXT: leaq (%rdi,%r9), %r11 53; CHECK-NEXT: vmovups %ymm0, (%rax,%r11) 54; CHECK-NEXT: vmovups %ymm0, 1(%rax,%r11) 55; CHECK-NEXT: vmovups %ymm0, 2(%rax,%r11) 56; CHECK-NEXT: vmovups %ymm0, 3(%rax,%r11) 57; CHECK-NEXT: vmovups %ymm0, 4(%rax,%r11) 58; CHECK-NEXT: vmovups %ymm0, 5(%rax,%r11) 59; CHECK-NEXT: vmovups %ymm0, 6(%rax,%r11) 60; CHECK-NEXT: vmovups %ymm0, 7(%rax,%r11) 61; CHECK-NEXT: addq $8, %r9 62; CHECK-NEXT: cmpq %r9, %r10 63; CHECK-NEXT: jne .LBB0_6 64; CHECK-NEXT: .LBB0_7: # %.lr.ph373.unr-lcssa 65; CHECK-NEXT: testq %rsi, %rsi 66; CHECK-NEXT: je .LBB0_10 67; CHECK-NEXT: # %bb.8: # %vector.body807.epil.preheader 68; CHECK-NEXT: addq %rdi, %r9 69; CHECK-NEXT: xorl %r10d, %r10d 70; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 71; CHECK-NEXT: .p2align 4 72; CHECK-NEXT: .LBB0_9: # %vector.body807.epil 73; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 74; CHECK-NEXT: leaq (%r9,%r10), %r11 75; CHECK-NEXT: vmovups %ymm0, (%rax,%r11) 76; CHECK-NEXT: incq %r10 77; CHECK-NEXT: cmpq %r10, %rsi 78; CHECK-NEXT: jne .LBB0_9 79; CHECK-NEXT: .LBB0_10: # %.lr.ph373 80; CHECK-NEXT: testb $1, %r8b 81; CHECK-NEXT: je .LBB0_11 82; CHECK-NEXT: # %bb.19: # %scalar.ph839.preheader 83; CHECK-NEXT: movl $0, (%rdi) 84; CHECK-NEXT: vzeroupper 85; CHECK-NEXT: retq 86; CHECK-NEXT: .LBB0_11: # %vector.body847.preheader 87; CHECK-NEXT: movl %edx, %esi 88; CHECK-NEXT: andl $7, %esi 89; CHECK-NEXT: cmpq $7, %rcx 90; CHECK-NEXT: jae .LBB0_13 91; CHECK-NEXT: # %bb.12: 92; CHECK-NEXT: xorl %ecx, %ecx 93; CHECK-NEXT: jmp .LBB0_15 94; CHECK-NEXT: .LBB0_13: # %vector.body847.preheader.new 95; CHECK-NEXT: andq $-8, %rdx 96; CHECK-NEXT: xorl %ecx, %ecx 97; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 98; CHECK-NEXT: .p2align 4 99; CHECK-NEXT: .LBB0_14: # %vector.body847 100; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 101; CHECK-NEXT: leaq (%rdi,%rcx), %r8 102; CHECK-NEXT: vmovups %ymm0, 96(%rax,%r8) 103; CHECK-NEXT: vmovups %ymm0, 97(%rax,%r8) 104; CHECK-NEXT: vmovups %ymm0, 98(%rax,%r8) 105; CHECK-NEXT: vmovups %ymm0, 99(%rax,%r8) 106; CHECK-NEXT: vmovups %ymm0, 100(%rax,%r8) 107; CHECK-NEXT: vmovups %ymm0, 101(%rax,%r8) 108; CHECK-NEXT: vmovups %ymm0, 102(%rax,%r8) 109; CHECK-NEXT: vmovups %ymm0, 103(%rax,%r8) 110; CHECK-NEXT: addq $8, %rcx 111; CHECK-NEXT: cmpq %rcx, %rdx 112; CHECK-NEXT: jne .LBB0_14 113; CHECK-NEXT: .LBB0_15: # %common.ret.loopexit.unr-lcssa 114; CHECK-NEXT: testq %rsi, %rsi 115; CHECK-NEXT: je .LBB0_18 116; CHECK-NEXT: # %bb.16: # %vector.body847.epil.preheader 117; CHECK-NEXT: leaq 96(%rcx,%rdi), %rcx 118; CHECK-NEXT: xorl %edx, %edx 119; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 120; CHECK-NEXT: .p2align 4 121; CHECK-NEXT: .LBB0_17: # %vector.body847.epil 122; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 123; CHECK-NEXT: leaq (%rcx,%rdx), %rdi 124; CHECK-NEXT: vmovups %ymm0, (%rax,%rdi) 125; CHECK-NEXT: incq %rdx 126; CHECK-NEXT: cmpq %rdx, %rsi 127; CHECK-NEXT: jne .LBB0_17 128; CHECK-NEXT: .LBB0_18: # %common.ret 129; CHECK-NEXT: vzeroupper 130; CHECK-NEXT: retq 131.preheader263: 132 %4 = shl i64 %3, 2 133 br label %5 134 1355: ; preds = %16, %.preheader263 136 %lsr.iv1135 = phi ptr [ %0, %.preheader263 ], [ %uglygep1136, %16 ] 137 %indvars.iv487 = phi i64 [ 1, %.preheader263 ], [ %indvars.iv.next488, %16 ] 138 %6 = getelementptr float, ptr %1, i64 %indvars.iv487 139 %7 = load float, ptr %6, align 4 140 %8 = fcmp oeq float %7, 0.000000e+00 141 %9 = getelementptr float, ptr %2, i64 %indvars.iv487 142 %10 = load float, ptr %9, align 4 143 %11 = fcmp oeq float %10, 0.000000e+00 144 %12 = and i1 %8, %11 145 br i1 %12, label %vector.body807.preheader, label %16 146 147vector.body807.preheader: ; preds = %5 148 %13 = add i64 %3, 1 149 %xtraiter = and i64 %13, 7 150 %14 = icmp ult i64 %3, 7 151 br i1 %14, label %.lr.ph373.unr-lcssa, label %vector.body807.preheader.new 152 153vector.body807.preheader.new: ; preds = %vector.body807.preheader 154 %unroll_iter = and i64 %13, -8 155 br label %vector.body807 156 157vector.body807: ; preds = %vector.body807, %vector.body807.preheader.new 158 %lsr.iv1194 = phi i64 [ 0, %vector.body807.preheader.new ], [ %lsr.iv.next1195.7, %vector.body807 ] 159 %niter = phi i64 [ 0, %vector.body807.preheader.new ], [ %niter.next.7, %vector.body807 ] 160 %uglygep1197 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv1194 161 store <8 x float> zeroinitializer, ptr %uglygep1197, align 4 162 %lsr.iv.next1195 = or disjoint i64 %lsr.iv1194, 1 163 %uglygep1197.1 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195 164 store <8 x float> zeroinitializer, ptr %uglygep1197.1, align 4 165 %lsr.iv.next1195.1 = or disjoint i64 %lsr.iv1194, 2 166 %uglygep1197.2 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195.1 167 store <8 x float> zeroinitializer, ptr %uglygep1197.2, align 4 168 %lsr.iv.next1195.2 = or disjoint i64 %lsr.iv1194, 3 169 %uglygep1197.3 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195.2 170 store <8 x float> zeroinitializer, ptr %uglygep1197.3, align 4 171 %lsr.iv.next1195.3 = or disjoint i64 %lsr.iv1194, 4 172 %uglygep1197.4 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195.3 173 store <8 x float> zeroinitializer, ptr %uglygep1197.4, align 4 174 %lsr.iv.next1195.4 = or disjoint i64 %lsr.iv1194, 5 175 %uglygep1197.5 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195.4 176 store <8 x float> zeroinitializer, ptr %uglygep1197.5, align 4 177 %lsr.iv.next1195.5 = or disjoint i64 %lsr.iv1194, 6 178 %uglygep1197.6 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195.5 179 store <8 x float> zeroinitializer, ptr %uglygep1197.6, align 4 180 %lsr.iv.next1195.6 = or disjoint i64 %lsr.iv1194, 7 181 %uglygep1197.7 = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv.next1195.6 182 store <8 x float> zeroinitializer, ptr %uglygep1197.7, align 4 183 %lsr.iv.next1195.7 = add i64 %lsr.iv1194, 8 184 %niter.next.7 = add i64 %niter, 8 185 %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter 186 br i1 %niter.ncmp.7, label %.lr.ph373.unr-lcssa.loopexit, label %vector.body807 187 188.lr.ph373.unr-lcssa.loopexit: ; preds = %vector.body807 189 br label %.lr.ph373.unr-lcssa 190 191.lr.ph373.unr-lcssa: ; preds = %.lr.ph373.unr-lcssa.loopexit, %vector.body807.preheader 192 %lsr.iv1194.unr = phi i64 [ 0, %vector.body807.preheader ], [ %lsr.iv.next1195.7, %.lr.ph373.unr-lcssa.loopexit ] 193 %lcmp.mod.not = icmp eq i64 %xtraiter, 0 194 br i1 %lcmp.mod.not, label %.lr.ph373, label %vector.body807.epil.preheader 195 196vector.body807.epil.preheader: ; preds = %.lr.ph373.unr-lcssa 197 br label %vector.body807.epil 198 199vector.body807.epil: ; preds = %vector.body807.epil.preheader, %vector.body807.epil 200 %lsr.iv1194.epil = phi i64 [ %lsr.iv.next1195.epil, %vector.body807.epil ], [ %lsr.iv1194.unr, %vector.body807.epil.preheader ] 201 %epil.iter = phi i64 [ %epil.iter.next, %vector.body807.epil ], [ 0, %vector.body807.epil.preheader ] 202 %uglygep1197.epil = getelementptr i8, ptr %lsr.iv1135, i64 %lsr.iv1194.epil 203 store <8 x float> zeroinitializer, ptr %uglygep1197.epil, align 4 204 %lsr.iv.next1195.epil = add i64 %lsr.iv1194.epil, 1 205 %epil.iter.next = add i64 %epil.iter, 1 206 %epil.iter.cmp.not = icmp eq i64 %epil.iter.next, %xtraiter 207 br i1 %epil.iter.cmp.not, label %.lr.ph373.loopexit, label %vector.body807.epil 208 209.lr.ph373.loopexit: ; preds = %vector.body807.epil 210 br label %.lr.ph373 211 212.lr.ph373: ; preds = %.lr.ph373.loopexit, %.lr.ph373.unr-lcssa 213 br i1 %min.iters.check840, label %scalar.ph839.preheader, label %vector.body847.preheader 214 215vector.body847.preheader: ; preds = %.lr.ph373 216 %uglygep11551 = getelementptr i8, ptr %lsr.iv1135, i64 96 217 %xtraiter12 = and i64 %13, 7 218 %15 = icmp ult i64 %3, 7 219 br i1 %15, label %common.ret.loopexit.unr-lcssa, label %vector.body847.preheader.new 220 221vector.body847.preheader.new: ; preds = %vector.body847.preheader 222 %unroll_iter15 = and i64 %13, -8 223 br label %vector.body847 224 225vector.body847: ; preds = %vector.body847, %vector.body847.preheader.new 226 %lsr.iv1152 = phi i64 [ 0, %vector.body847.preheader.new ], [ %lsr.iv.next1153.7, %vector.body847 ] 227 %niter16 = phi i64 [ 0, %vector.body847.preheader.new ], [ %niter16.next.7, %vector.body847 ] 228 %uglygep1156 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv1152 229 store <8 x float> zeroinitializer, ptr %uglygep1156, align 4 230 %lsr.iv.next1153 = or disjoint i64 %lsr.iv1152, 1 231 %uglygep1156.1 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153 232 store <8 x float> zeroinitializer, ptr %uglygep1156.1, align 4 233 %lsr.iv.next1153.1 = or disjoint i64 %lsr.iv1152, 2 234 %uglygep1156.2 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153.1 235 store <8 x float> zeroinitializer, ptr %uglygep1156.2, align 4 236 %lsr.iv.next1153.2 = or disjoint i64 %lsr.iv1152, 3 237 %uglygep1156.3 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153.2 238 store <8 x float> zeroinitializer, ptr %uglygep1156.3, align 4 239 %lsr.iv.next1153.3 = or disjoint i64 %lsr.iv1152, 4 240 %uglygep1156.4 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153.3 241 store <8 x float> zeroinitializer, ptr %uglygep1156.4, align 4 242 %lsr.iv.next1153.4 = or disjoint i64 %lsr.iv1152, 5 243 %uglygep1156.5 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153.4 244 store <8 x float> zeroinitializer, ptr %uglygep1156.5, align 4 245 %lsr.iv.next1153.5 = or disjoint i64 %lsr.iv1152, 6 246 %uglygep1156.6 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153.5 247 store <8 x float> zeroinitializer, ptr %uglygep1156.6, align 4 248 %lsr.iv.next1153.6 = or disjoint i64 %lsr.iv1152, 7 249 %uglygep1156.7 = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv.next1153.6 250 store <8 x float> zeroinitializer, ptr %uglygep1156.7, align 4 251 %lsr.iv.next1153.7 = add i64 %lsr.iv1152, 8 252 %niter16.next.7 = add i64 %niter16, 8 253 %niter16.ncmp.7 = icmp eq i64 %niter16.next.7, %unroll_iter15 254 br i1 %niter16.ncmp.7, label %common.ret.loopexit.unr-lcssa.loopexit, label %vector.body847 255 256common.ret.loopexit.unr-lcssa.loopexit: ; preds = %vector.body847 257 br label %common.ret.loopexit.unr-lcssa 258 259common.ret.loopexit.unr-lcssa: ; preds = %common.ret.loopexit.unr-lcssa.loopexit, %vector.body847.preheader 260 %lsr.iv1152.unr = phi i64 [ 0, %vector.body847.preheader ], [ %lsr.iv.next1153.7, %common.ret.loopexit.unr-lcssa.loopexit ] 261 %lcmp.mod14.not = icmp eq i64 %xtraiter12, 0 262 br i1 %lcmp.mod14.not, label %common.ret, label %vector.body847.epil.preheader 263 264vector.body847.epil.preheader: ; preds = %common.ret.loopexit.unr-lcssa 265 br label %vector.body847.epil 266 267vector.body847.epil: ; preds = %vector.body847.epil.preheader, %vector.body847.epil 268 %lsr.iv1152.epil = phi i64 [ %lsr.iv.next1153.epil, %vector.body847.epil ], [ %lsr.iv1152.unr, %vector.body847.epil.preheader ] 269 %epil.iter13 = phi i64 [ %epil.iter13.next, %vector.body847.epil ], [ 0, %vector.body847.epil.preheader ] 270 %uglygep1156.epil = getelementptr i8, ptr %uglygep11551, i64 %lsr.iv1152.epil 271 store <8 x float> zeroinitializer, ptr %uglygep1156.epil, align 4 272 %lsr.iv.next1153.epil = add i64 %lsr.iv1152.epil, 1 273 %epil.iter13.next = add i64 %epil.iter13, 1 274 %epil.iter13.cmp.not = icmp eq i64 %epil.iter13.next, %xtraiter12 275 br i1 %epil.iter13.cmp.not, label %common.ret.loopexit, label %vector.body847.epil 276 277common.ret.loopexit: ; preds = %vector.body847.epil 278 br label %common.ret 279 280common.ret.loopexit1: ; preds = %16 281 br label %common.ret 282 283common.ret: ; preds = %common.ret.loopexit1, %common.ret.loopexit, %scalar.ph839.preheader, %common.ret.loopexit.unr-lcssa 284 ret void 285 286scalar.ph839.preheader: ; preds = %.lr.ph373 287 store float 0.000000e+00, ptr %0, align 4 288 br label %common.ret 289 29016: ; preds = %5 291 %indvars.iv.next488 = add i64 %indvars.iv487, 1 292 %exitcond492.not = icmp eq i64 %indvars.iv.next488, %3 293 %uglygep1136 = getelementptr i8, ptr %lsr.iv1135, i64 %4 294 br i1 %exitcond492.not, label %common.ret.loopexit1, label %5 295} 296