1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3 4; LSR should be able to eliminate the max computations by 5; making the loops use slt/ult comparisons instead of ne comparisons. 6 7target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" 8target triple = "i386-apple-darwin9" 9 10define void @foo(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind { 11; CHECK-LABEL: foo: 12; CHECK: ## %bb.0: ## %entry 13; CHECK-NEXT: pushl %ebp 14; CHECK-NEXT: pushl %ebx 15; CHECK-NEXT: pushl %edi 16; CHECK-NEXT: pushl %esi 17; CHECK-NEXT: subl $28, %esp 18; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 19; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 20; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 21; CHECK-NEXT: movl %edi, %ecx 22; CHECK-NEXT: imull %ebp, %ecx 23; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) 24; CHECK-NEXT: movl %ecx, (%esp) ## 4-byte Spill 25; CHECK-NEXT: je LBB0_19 26; CHECK-NEXT: ## %bb.1: ## %bb10.preheader 27; CHECK-NEXT: movl %ecx, %eax 28; CHECK-NEXT: sarl $31, %eax 29; CHECK-NEXT: shrl $30, %eax 30; CHECK-NEXT: addl %ecx, %eax 31; CHECK-NEXT: sarl $2, %eax 32; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 33; CHECK-NEXT: testl %edi, %edi 34; CHECK-NEXT: jle LBB0_12 35; CHECK-NEXT: ## %bb.2: ## %bb.nph9 36; CHECK-NEXT: testl %ebp, %ebp 37; CHECK-NEXT: jle LBB0_12 38; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split 39; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 40; CHECK-NEXT: incl %eax 41; CHECK-NEXT: xorl %ecx, %ecx 42; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 43; CHECK-NEXT: xorl %esi, %esi 44; CHECK-NEXT: .p2align 4 45; CHECK-NEXT: LBB0_4: ## %bb6 46; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 47; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx 48; CHECK-NEXT: movb %bl, (%edx,%esi) 49; CHECK-NEXT: incl %esi 50; CHECK-NEXT: cmpl %ebp, %esi 51; CHECK-NEXT: jl LBB0_4 52; CHECK-NEXT: ## %bb.5: ## %bb9 53; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 54; CHECK-NEXT: incl %ecx 55; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax 56; CHECK-NEXT: addl %ebp, %edx 57; CHECK-NEXT: cmpl %edi, %ecx 58; CHECK-NEXT: je LBB0_12 59; CHECK-NEXT: ## %bb.6: ## %bb7.preheader 60; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 61; CHECK-NEXT: xorl %esi, %esi 62; CHECK-NEXT: jmp LBB0_4 63; CHECK-NEXT: LBB0_12: ## %bb18.loopexit 64; CHECK-NEXT: movl (%esp), %eax ## 4-byte Reload 65; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 66; CHECK-NEXT: addl %ecx, %eax 67; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 68; CHECK-NEXT: cmpl $1, %edi 69; CHECK-NEXT: jle LBB0_13 70; CHECK-NEXT: ## %bb.7: ## %bb.nph5 71; CHECK-NEXT: cmpl $2, %ebp 72; CHECK-NEXT: jl LBB0_13 73; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split 74; CHECK-NEXT: movl %ebp, %edx 75; CHECK-NEXT: shrl $31, %edx 76; CHECK-NEXT: addl %ebp, %edx 77; CHECK-NEXT: sarl %edx 78; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 79; CHECK-NEXT: movl %eax, %ecx 80; CHECK-NEXT: shrl $31, %ecx 81; CHECK-NEXT: addl %eax, %ecx 82; CHECK-NEXT: sarl %ecx 83; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 84; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 85; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 86; CHECK-NEXT: addl %ecx, %eax 87; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 88; CHECK-NEXT: addl $2, %esi 89; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 90; CHECK-NEXT: movl (%esp), %esi ## 4-byte Reload 91; CHECK-NEXT: addl %esi, %ecx 92; CHECK-NEXT: xorl %esi, %esi 93; CHECK-NEXT: xorl %edi, %edi 94; CHECK-NEXT: .p2align 4 95; CHECK-NEXT: LBB0_9: ## %bb13 96; CHECK-NEXT: ## =>This Loop Header: Depth=1 97; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 98; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 99; CHECK-NEXT: andl $1, %edi 100; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 101; CHECK-NEXT: addl %esi, %edi 102; CHECK-NEXT: imull {{[0-9]+}}(%esp), %edi 103; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload 104; CHECK-NEXT: xorl %esi, %esi 105; CHECK-NEXT: .p2align 4 106; CHECK-NEXT: LBB0_10: ## %bb14 107; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 108; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 109; CHECK-NEXT: movzbl -2(%edi,%esi,4), %ebx 110; CHECK-NEXT: movb %bl, (%ecx,%esi) 111; CHECK-NEXT: movzbl (%edi,%esi,4), %ebx 112; CHECK-NEXT: movb %bl, (%eax,%esi) 113; CHECK-NEXT: incl %esi 114; CHECK-NEXT: cmpl %edx, %esi 115; CHECK-NEXT: jl LBB0_10 116; CHECK-NEXT: ## %bb.11: ## %bb17 117; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1 118; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload 119; CHECK-NEXT: incl %edi 120; CHECK-NEXT: addl %edx, %eax 121; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload 122; CHECK-NEXT: addl $2, %esi 123; CHECK-NEXT: addl %edx, %ecx 124; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload 125; CHECK-NEXT: jl LBB0_9 126; CHECK-NEXT: LBB0_13: ## %bb20 127; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 128; CHECK-NEXT: cmpl $1, %eax 129; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 130; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 131; CHECK-NEXT: je LBB0_19 132; CHECK-NEXT: ## %bb.14: ## %bb20 133; CHECK-NEXT: cmpl $3, %eax 134; CHECK-NEXT: jne LBB0_24 135; CHECK-NEXT: ## %bb.15: ## %bb22 136; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload 137; CHECK-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 138; CHECK-NEXT: testl %edi, %edi 139; CHECK-NEXT: jle LBB0_18 140; CHECK-NEXT: ## %bb.16: ## %bb.nph 141; CHECK-NEXT: leal 15(%edi), %eax 142; CHECK-NEXT: andl $-16, %eax 143; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax 144; CHECK-NEXT: addl %ebx, %ebx 145; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 146; CHECK-NEXT: movl (%esp), %esi ## 4-byte Reload 147; CHECK-NEXT: addl %esi, %ecx 148; CHECK-NEXT: addl %ecx, %ebx 149; CHECK-NEXT: addl %eax, %edx 150; CHECK-NEXT: leal 15(%ebp), %eax 151; CHECK-NEXT: andl $-16, %eax 152; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 153; CHECK-NEXT: .p2align 4 154; CHECK-NEXT: LBB0_17: ## %bb23 155; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 156; CHECK-NEXT: subl $4, %esp 157; CHECK-NEXT: pushl %ebp 158; CHECK-NEXT: pushl %edx 159; CHECK-NEXT: pushl %ebx 160; CHECK-NEXT: movl %ebx, %esi 161; CHECK-NEXT: movl %edx, %ebx 162; CHECK-NEXT: calll _memcpy 163; CHECK-NEXT: movl %ebx, %edx 164; CHECK-NEXT: movl %esi, %ebx 165; CHECK-NEXT: addl $16, %esp 166; CHECK-NEXT: addl %ebp, %ebx 167; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload 168; CHECK-NEXT: decl %edi 169; CHECK-NEXT: jne LBB0_17 170; CHECK-NEXT: LBB0_18: ## %bb26 171; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 172; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload 173; CHECK-NEXT: addl %edx, %eax 174; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 175; CHECK-NEXT: addl %eax, %ecx 176; CHECK-NEXT: jmp LBB0_23 177; CHECK-NEXT: LBB0_19: ## %bb29 178; CHECK-NEXT: testl %edi, %edi 179; CHECK-NEXT: jle LBB0_22 180; CHECK-NEXT: ## %bb.20: ## %bb.nph11 181; CHECK-NEXT: movl %edi, %esi 182; CHECK-NEXT: leal 15(%ebp), %eax 183; CHECK-NEXT: andl $-16, %eax 184; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 185; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 186; CHECK-NEXT: .p2align 4 187; CHECK-NEXT: LBB0_21: ## %bb30 188; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 189; CHECK-NEXT: subl $4, %esp 190; CHECK-NEXT: pushl %ebp 191; CHECK-NEXT: pushl %edx 192; CHECK-NEXT: pushl %edi 193; CHECK-NEXT: movl %edx, %ebx 194; CHECK-NEXT: calll _memcpy 195; CHECK-NEXT: movl %ebx, %edx 196; CHECK-NEXT: addl $16, %esp 197; CHECK-NEXT: addl %ebp, %edi 198; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload 199; CHECK-NEXT: decl %esi 200; CHECK-NEXT: jne LBB0_21 201; CHECK-NEXT: LBB0_22: ## %bb33 202; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 203; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload 204; CHECK-NEXT: addl %edx, %ecx 205; CHECK-NEXT: LBB0_23: ## %bb33 206; CHECK-NEXT: movl %edx, %eax 207; CHECK-NEXT: shrl $31, %eax 208; CHECK-NEXT: addl %edx, %eax 209; CHECK-NEXT: sarl %eax 210; CHECK-NEXT: subl $4, %esp 211; CHECK-NEXT: pushl %eax 212; CHECK-NEXT: pushl $128 213; CHECK-NEXT: pushl %ecx 214; CHECK-NEXT: calll _memset 215; CHECK-NEXT: addl $44, %esp 216; CHECK-NEXT: LBB0_25: ## %return 217; CHECK-NEXT: popl %esi 218; CHECK-NEXT: popl %edi 219; CHECK-NEXT: popl %ebx 220; CHECK-NEXT: popl %ebp 221; CHECK-NEXT: retl 222; CHECK-NEXT: LBB0_24: ## %return 223; CHECK-NEXT: addl $28, %esp 224; CHECK-NEXT: jmp LBB0_25 225entry: 226 %0 = mul i32 %x, %w 227 %1 = mul i32 %x, %w 228 %2 = sdiv i32 %1, 4 229 %.sum2 = add i32 %2, %0 230 %cond = icmp eq i32 %d, 1 231 br i1 %cond, label %bb29, label %bb10.preheader 232 233bb10.preheader: ; preds = %entry 234 %3 = icmp sgt i32 %x, 0 235 br i1 %3, label %bb.nph9, label %bb18.loopexit 236 237bb.nph7: ; preds = %bb7.preheader 238 %4 = mul i32 %y.08, %w 239 %5 = mul i32 %y.08, %s 240 %6 = add i32 %5, 1 241 %tmp8 = icmp sgt i32 1, %w 242 %smax9 = select i1 %tmp8, i32 1, i32 %w 243 br label %bb6 244 245bb6: ; preds = %bb7, %bb.nph7 246 %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ] 247 %7 = add i32 %x.06, %4 248 %8 = shl i32 %x.06, 1 249 %9 = add i32 %6, %8 250 %10 = getelementptr i8, ptr %r, i32 %9 251 %11 = load i8, ptr %10, align 1 252 %12 = getelementptr i8, ptr %j, i32 %7 253 store i8 %11, ptr %12, align 1 254 br label %bb7 255 256bb7: ; preds = %bb6 257 %indvar.next7 = add i32 %x.06, 1 258 %exitcond10 = icmp ne i32 %indvar.next7, %smax9 259 br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge 260 261bb7.bb9_crit_edge: ; preds = %bb7 262 br label %bb9 263 264bb9: ; preds = %bb7.preheader, %bb7.bb9_crit_edge 265 br label %bb10 266 267bb10: ; preds = %bb9 268 %indvar.next11 = add i32 %y.08, 1 269 %exitcond12 = icmp ne i32 %indvar.next11, %x 270 br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge 271 272bb10.bb18.loopexit_crit_edge: ; preds = %bb10 273 br label %bb10.bb18.loopexit_crit_edge.split 274 275bb10.bb18.loopexit_crit_edge.split: ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge 276 br label %bb18.loopexit 277 278bb.nph9: ; preds = %bb10.preheader 279 %13 = icmp sgt i32 %w, 0 280 br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split 281 282bb.nph9.split: ; preds = %bb.nph9 283 br label %bb7.preheader 284 285bb7.preheader: ; preds = %bb.nph9.split, %bb10 286 %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ] 287 br i1 true, label %bb.nph7, label %bb9 288 289bb.nph5: ; preds = %bb18.loopexit 290 %14 = sdiv i32 %w, 2 291 %15 = icmp slt i32 %w, 2 292 %16 = sdiv i32 %x, 2 293 br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split 294 295bb.nph5.split: ; preds = %bb.nph5 296 %tmp2 = icmp sgt i32 1, %16 297 %smax3 = select i1 %tmp2, i32 1, i32 %16 298 br label %bb13 299 300bb13: ; preds = %bb18, %bb.nph5.split 301 %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ] 302 %17 = mul i32 %14, %y.14 303 %18 = shl i32 %y.14, 1 304 %19 = srem i32 %y.14, 2 305 %20 = add i32 %19, %18 306 %21 = mul i32 %20, %s 307 br i1 true, label %bb.nph3, label %bb17 308 309bb.nph3: ; preds = %bb13 310 %22 = add i32 %17, %0 311 %23 = add i32 %17, %.sum2 312 %24 = sdiv i32 %w, 2 313 %tmp = icmp sgt i32 1, %24 314 %smax = select i1 %tmp, i32 1, i32 %24 315 br label %bb14 316 317bb14: ; preds = %bb15, %bb.nph3 318 %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ] 319 %25 = shl i32 %x.12, 2 320 %26 = add i32 %25, %21 321 %27 = getelementptr i8, ptr %r, i32 %26 322 %28 = load i8, ptr %27, align 1 323 %.sum = add i32 %22, %x.12 324 %29 = getelementptr i8, ptr %j, i32 %.sum 325 store i8 %28, ptr %29, align 1 326 %30 = shl i32 %x.12, 2 327 %31 = or disjoint i32 %30, 2 328 %32 = add i32 %31, %21 329 %33 = getelementptr i8, ptr %r, i32 %32 330 %34 = load i8, ptr %33, align 1 331 %.sum6 = add i32 %23, %x.12 332 %35 = getelementptr i8, ptr %j, i32 %.sum6 333 store i8 %34, ptr %35, align 1 334 br label %bb15 335 336bb15: ; preds = %bb14 337 %indvar.next = add i32 %x.12, 1 338 %exitcond = icmp ne i32 %indvar.next, %smax 339 br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge 340 341bb15.bb17_crit_edge: ; preds = %bb15 342 br label %bb17 343 344bb17: ; preds = %bb15.bb17_crit_edge, %bb13 345 br label %bb18 346 347bb18.loopexit: ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader 348 %36 = icmp slt i32 %x, 2 349 br i1 %36, label %bb20, label %bb.nph5 350 351bb18: ; preds = %bb17 352 %indvar.next1 = add i32 %y.14, 1 353 %exitcond4 = icmp ne i32 %indvar.next1, %smax3 354 br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge 355 356bb18.bb20_crit_edge: ; preds = %bb18 357 br label %bb18.bb20_crit_edge.split 358 359bb18.bb20_crit_edge.split: ; preds = %bb18.bb20_crit_edge, %bb.nph5 360 br label %bb20 361 362bb20: ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit 363 switch i32 %d, label %return [ 364 i32 3, label %bb22 365 i32 1, label %bb29 366 ] 367 368bb22: ; preds = %bb20 369 %37 = mul i32 %x, %w 370 %38 = sdiv i32 %37, 4 371 %.sum3 = add i32 %38, %.sum2 372 %39 = add i32 %x, 15 373 %40 = and i32 %39, -16 374 %41 = add i32 %w, 15 375 %42 = and i32 %41, -16 376 %43 = mul i32 %40, %s 377 %44 = icmp sgt i32 %x, 0 378 br i1 %44, label %bb.nph, label %bb26 379 380bb.nph: ; preds = %bb22 381 br label %bb23 382 383bb23: ; preds = %bb24, %bb.nph 384 %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ] 385 %45 = mul i32 %y.21, %42 386 %.sum1 = add i32 %45, %43 387 %46 = getelementptr i8, ptr %r, i32 %.sum1 388 %47 = mul i32 %y.21, %w 389 %.sum5 = add i32 %47, %.sum3 390 %48 = getelementptr i8, ptr %j, i32 %.sum5 391 tail call void @llvm.memcpy.p0.p0.i32(ptr %48, ptr %46, i32 %w, i1 false) 392 br label %bb24 393 394bb24: ; preds = %bb23 395 %indvar.next5 = add i32 %y.21, 1 396 %exitcond6 = icmp ne i32 %indvar.next5, %x 397 br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge 398 399bb24.bb26_crit_edge: ; preds = %bb24 400 br label %bb26 401 402bb26: ; preds = %bb24.bb26_crit_edge, %bb22 403 %49 = mul i32 %x, %w 404 %.sum4 = add i32 %.sum3, %49 405 %50 = getelementptr i8, ptr %j, i32 %.sum4 406 %51 = mul i32 %x, %w 407 %52 = sdiv i32 %51, 2 408 tail call void @llvm.memset.p0.i32(ptr %50, i8 -128, i32 %52, i1 false) 409 ret void 410 411bb29: ; preds = %bb20, %entry 412 %53 = add i32 %w, 15 413 %54 = and i32 %53, -16 414 %55 = icmp sgt i32 %x, 0 415 br i1 %55, label %bb.nph11, label %bb33 416 417bb.nph11: ; preds = %bb29 418 br label %bb30 419 420bb30: ; preds = %bb31, %bb.nph11 421 %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ] 422 %56 = mul i32 %y.310, %54 423 %57 = getelementptr i8, ptr %r, i32 %56 424 %58 = mul i32 %y.310, %w 425 %59 = getelementptr i8, ptr %j, i32 %58 426 tail call void @llvm.memcpy.p0.p0.i32(ptr %59, ptr %57, i32 %w, i1 false) 427 br label %bb31 428 429bb31: ; preds = %bb30 430 %indvar.next13 = add i32 %y.310, 1 431 %exitcond14 = icmp ne i32 %indvar.next13, %x 432 br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge 433 434bb31.bb33_crit_edge: ; preds = %bb31 435 br label %bb33 436 437bb33: ; preds = %bb31.bb33_crit_edge, %bb29 438 %60 = mul i32 %x, %w 439 %61 = getelementptr i8, ptr %j, i32 %60 440 %62 = mul i32 %x, %w 441 %63 = sdiv i32 %62, 2 442 tail call void @llvm.memset.p0.i32(ptr %61, i8 -128, i32 %63, i1 false) 443 ret void 444 445return: ; preds = %bb20 446 ret void 447} 448 449define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind { 450; CHECK-LABEL: bar: 451; CHECK: ## %bb.0: ## %entry 452; CHECK-NEXT: pushl %ebp 453; CHECK-NEXT: pushl %ebx 454; CHECK-NEXT: pushl %edi 455; CHECK-NEXT: pushl %esi 456; CHECK-NEXT: subl $28, %esp 457; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 458; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 459; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 460; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 461; CHECK-NEXT: movl %ebp, %edx 462; CHECK-NEXT: imull %eax, %edx 463; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) 464; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 465; CHECK-NEXT: je LBB1_19 466; CHECK-NEXT: ## %bb.1: ## %bb10.preheader 467; CHECK-NEXT: movl %edx, %ecx 468; CHECK-NEXT: shrl $2, %ecx 469; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 470; CHECK-NEXT: testl %ebp, %ebp 471; CHECK-NEXT: movl %eax, %edi 472; CHECK-NEXT: je LBB1_12 473; CHECK-NEXT: ## %bb.2: ## %bb.nph9 474; CHECK-NEXT: testl %eax, %eax 475; CHECK-NEXT: je LBB1_12 476; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split 477; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 478; CHECK-NEXT: incl %eax 479; CHECK-NEXT: xorl %ecx, %ecx 480; CHECK-NEXT: movl %esi, %edx 481; CHECK-NEXT: xorl %esi, %esi 482; CHECK-NEXT: .p2align 4 483; CHECK-NEXT: LBB1_4: ## %bb6 484; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 485; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx 486; CHECK-NEXT: movb %bl, (%edx,%esi) 487; CHECK-NEXT: incl %esi 488; CHECK-NEXT: cmpl %edi, %esi 489; CHECK-NEXT: jb LBB1_4 490; CHECK-NEXT: ## %bb.5: ## %bb9 491; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 492; CHECK-NEXT: incl %ecx 493; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax 494; CHECK-NEXT: addl %edi, %edx 495; CHECK-NEXT: cmpl %ebp, %ecx 496; CHECK-NEXT: je LBB1_12 497; CHECK-NEXT: ## %bb.6: ## %bb7.preheader 498; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 499; CHECK-NEXT: xorl %esi, %esi 500; CHECK-NEXT: jmp LBB1_4 501; CHECK-NEXT: LBB1_12: ## %bb18.loopexit 502; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 503; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 504; CHECK-NEXT: addl %ecx, %eax 505; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 506; CHECK-NEXT: cmpl $1, %ebp 507; CHECK-NEXT: jbe LBB1_13 508; CHECK-NEXT: ## %bb.7: ## %bb.nph5 509; CHECK-NEXT: cmpl $2, %edi 510; CHECK-NEXT: jb LBB1_13 511; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split 512; CHECK-NEXT: movl %edi, %ebp 513; CHECK-NEXT: shrl %ebp 514; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 515; CHECK-NEXT: shrl %eax 516; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 517; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 518; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 519; CHECK-NEXT: addl %eax, %ecx 520; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 521; CHECK-NEXT: addl $2, %edx 522; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 523; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload 524; CHECK-NEXT: addl %edx, %eax 525; CHECK-NEXT: xorl %edx, %edx 526; CHECK-NEXT: xorl %ebx, %ebx 527; CHECK-NEXT: .p2align 4 528; CHECK-NEXT: LBB1_9: ## %bb13 529; CHECK-NEXT: ## =>This Loop Header: Depth=1 530; CHECK-NEXT: ## Child Loop BB1_10 Depth 2 531; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 532; CHECK-NEXT: andl $1, %ebx 533; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill 534; CHECK-NEXT: addl %edx, %ebx 535; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx 536; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload 537; CHECK-NEXT: xorl %esi, %esi 538; CHECK-NEXT: .p2align 4 539; CHECK-NEXT: LBB1_10: ## %bb14 540; CHECK-NEXT: ## Parent Loop BB1_9 Depth=1 541; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 542; CHECK-NEXT: movzbl -2(%ebx,%esi,4), %edx 543; CHECK-NEXT: movb %dl, (%eax,%esi) 544; CHECK-NEXT: movzbl (%ebx,%esi,4), %edx 545; CHECK-NEXT: movb %dl, (%ecx,%esi) 546; CHECK-NEXT: incl %esi 547; CHECK-NEXT: cmpl %ebp, %esi 548; CHECK-NEXT: jb LBB1_10 549; CHECK-NEXT: ## %bb.11: ## %bb17 550; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1 551; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload 552; CHECK-NEXT: incl %ebx 553; CHECK-NEXT: addl %ebp, %ecx 554; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload 555; CHECK-NEXT: addl $2, %edx 556; CHECK-NEXT: addl %ebp, %eax 557; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload 558; CHECK-NEXT: jb LBB1_9 559; CHECK-NEXT: LBB1_13: ## %bb20 560; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 561; CHECK-NEXT: cmpl $1, %esi 562; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 563; CHECK-NEXT: movl %edi, %eax 564; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 565; CHECK-NEXT: je LBB1_19 566; CHECK-NEXT: ## %bb.14: ## %bb20 567; CHECK-NEXT: cmpl $3, %esi 568; CHECK-NEXT: jne LBB1_24 569; CHECK-NEXT: ## %bb.15: ## %bb22 570; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload 571; CHECK-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 572; CHECK-NEXT: testl %ebp, %ebp 573; CHECK-NEXT: je LBB1_18 574; CHECK-NEXT: ## %bb.16: ## %bb.nph 575; CHECK-NEXT: movl %ebp, %esi 576; CHECK-NEXT: leal 15(%ebp), %eax 577; CHECK-NEXT: andl $-16, %eax 578; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax 579; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 580; CHECK-NEXT: addl $15, %edx 581; CHECK-NEXT: andl $-16, %edx 582; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill 583; CHECK-NEXT: addl %eax, %ecx 584; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 585; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 586; CHECK-NEXT: leal (%edx,%eax), %ebp 587; CHECK-NEXT: .p2align 4 588; CHECK-NEXT: LBB1_17: ## %bb23 589; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 590; CHECK-NEXT: subl $4, %esp 591; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx 592; CHECK-NEXT: pushl %ebx 593; CHECK-NEXT: pushl %ecx 594; CHECK-NEXT: pushl %ebp 595; CHECK-NEXT: movl %ecx, %edi 596; CHECK-NEXT: calll _memcpy 597; CHECK-NEXT: movl %edi, %ecx 598; CHECK-NEXT: addl $16, %esp 599; CHECK-NEXT: addl %ebx, %ebp 600; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload 601; CHECK-NEXT: decl %esi 602; CHECK-NEXT: jne LBB1_17 603; CHECK-NEXT: LBB1_18: ## %bb26 604; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 605; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 606; CHECK-NEXT: addl %ecx, %eax 607; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 608; CHECK-NEXT: addl %eax, %edx 609; CHECK-NEXT: shrl %ecx 610; CHECK-NEXT: subl $4, %esp 611; CHECK-NEXT: pushl %ecx 612; CHECK-NEXT: pushl $128 613; CHECK-NEXT: pushl %edx 614; CHECK-NEXT: jmp LBB1_23 615; CHECK-NEXT: LBB1_19: ## %bb29 616; CHECK-NEXT: testl %ebp, %ebp 617; CHECK-NEXT: je LBB1_22 618; CHECK-NEXT: ## %bb.20: ## %bb.nph11 619; CHECK-NEXT: movl %ebp, %esi 620; CHECK-NEXT: movl %eax, %edi 621; CHECK-NEXT: addl $15, %eax 622; CHECK-NEXT: andl $-16, %eax 623; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill 624; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 625; CHECK-NEXT: .p2align 4 626; CHECK-NEXT: LBB1_21: ## %bb30 627; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 628; CHECK-NEXT: subl $4, %esp 629; CHECK-NEXT: pushl %edi 630; CHECK-NEXT: pushl %ecx 631; CHECK-NEXT: pushl %ebp 632; CHECK-NEXT: movl %ecx, %ebx 633; CHECK-NEXT: calll _memcpy 634; CHECK-NEXT: movl %ebx, %ecx 635; CHECK-NEXT: addl $16, %esp 636; CHECK-NEXT: addl %edi, %ebp 637; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload 638; CHECK-NEXT: decl %esi 639; CHECK-NEXT: jne LBB1_21 640; CHECK-NEXT: LBB1_22: ## %bb33 641; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 642; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 643; CHECK-NEXT: addl %eax, %ecx 644; CHECK-NEXT: shrl %eax 645; CHECK-NEXT: subl $4, %esp 646; CHECK-NEXT: pushl %eax 647; CHECK-NEXT: pushl $128 648; CHECK-NEXT: pushl %ecx 649; CHECK-NEXT: LBB1_23: ## %bb33 650; CHECK-NEXT: calll _memset 651; CHECK-NEXT: addl $44, %esp 652; CHECK-NEXT: LBB1_25: ## %return 653; CHECK-NEXT: popl %esi 654; CHECK-NEXT: popl %edi 655; CHECK-NEXT: popl %ebx 656; CHECK-NEXT: popl %ebp 657; CHECK-NEXT: retl 658; CHECK-NEXT: LBB1_24: ## %return 659; CHECK-NEXT: addl $28, %esp 660; CHECK-NEXT: jmp LBB1_25 661entry: 662 %0 = mul i32 %x, %w 663 %1 = mul i32 %x, %w 664 %2 = udiv i32 %1, 4 665 %.sum2 = add i32 %2, %0 666 %cond = icmp eq i32 %d, 1 667 br i1 %cond, label %bb29, label %bb10.preheader 668 669bb10.preheader: ; preds = %entry 670 %3 = icmp ne i32 %x, 0 671 br i1 %3, label %bb.nph9, label %bb18.loopexit 672 673bb.nph7: ; preds = %bb7.preheader 674 %4 = mul i32 %y.08, %w 675 %5 = mul i32 %y.08, %s 676 %6 = add i32 %5, 1 677 %tmp8 = icmp ugt i32 1, %w 678 %smax9 = select i1 %tmp8, i32 1, i32 %w 679 br label %bb6 680 681bb6: ; preds = %bb7, %bb.nph7 682 %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ] 683 %7 = add i32 %x.06, %4 684 %8 = shl i32 %x.06, 1 685 %9 = add i32 %6, %8 686 %10 = getelementptr i8, ptr %r, i32 %9 687 %11 = load i8, ptr %10, align 1 688 %12 = getelementptr i8, ptr %j, i32 %7 689 store i8 %11, ptr %12, align 1 690 br label %bb7 691 692bb7: ; preds = %bb6 693 %indvar.next7 = add i32 %x.06, 1 694 %exitcond10 = icmp ne i32 %indvar.next7, %smax9 695 br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge 696 697bb7.bb9_crit_edge: ; preds = %bb7 698 br label %bb9 699 700bb9: ; preds = %bb7.preheader, %bb7.bb9_crit_edge 701 br label %bb10 702 703bb10: ; preds = %bb9 704 %indvar.next11 = add i32 %y.08, 1 705 %exitcond12 = icmp ne i32 %indvar.next11, %x 706 br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge 707 708bb10.bb18.loopexit_crit_edge: ; preds = %bb10 709 br label %bb10.bb18.loopexit_crit_edge.split 710 711bb10.bb18.loopexit_crit_edge.split: ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge 712 br label %bb18.loopexit 713 714bb.nph9: ; preds = %bb10.preheader 715 %13 = icmp ugt i32 %w, 0 716 br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split 717 718bb.nph9.split: ; preds = %bb.nph9 719 br label %bb7.preheader 720 721bb7.preheader: ; preds = %bb.nph9.split, %bb10 722 %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ] 723 br i1 true, label %bb.nph7, label %bb9 724 725bb.nph5: ; preds = %bb18.loopexit 726 %14 = udiv i32 %w, 2 727 %15 = icmp ult i32 %w, 2 728 %16 = udiv i32 %x, 2 729 br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split 730 731bb.nph5.split: ; preds = %bb.nph5 732 %tmp2 = icmp ugt i32 1, %16 733 %smax3 = select i1 %tmp2, i32 1, i32 %16 734 br label %bb13 735 736bb13: ; preds = %bb18, %bb.nph5.split 737 %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ] 738 %17 = mul i32 %14, %y.14 739 %18 = shl i32 %y.14, 1 740 %19 = urem i32 %y.14, 2 741 %20 = add i32 %19, %18 742 %21 = mul i32 %20, %s 743 br i1 true, label %bb.nph3, label %bb17 744 745bb.nph3: ; preds = %bb13 746 %22 = add i32 %17, %0 747 %23 = add i32 %17, %.sum2 748 %24 = udiv i32 %w, 2 749 %tmp = icmp ugt i32 1, %24 750 %smax = select i1 %tmp, i32 1, i32 %24 751 br label %bb14 752 753bb14: ; preds = %bb15, %bb.nph3 754 %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ] 755 %25 = shl i32 %x.12, 2 756 %26 = add i32 %25, %21 757 %27 = getelementptr i8, ptr %r, i32 %26 758 %28 = load i8, ptr %27, align 1 759 %.sum = add i32 %22, %x.12 760 %29 = getelementptr i8, ptr %j, i32 %.sum 761 store i8 %28, ptr %29, align 1 762 %30 = shl i32 %x.12, 2 763 %31 = or disjoint i32 %30, 2 764 %32 = add i32 %31, %21 765 %33 = getelementptr i8, ptr %r, i32 %32 766 %34 = load i8, ptr %33, align 1 767 %.sum6 = add i32 %23, %x.12 768 %35 = getelementptr i8, ptr %j, i32 %.sum6 769 store i8 %34, ptr %35, align 1 770 br label %bb15 771 772bb15: ; preds = %bb14 773 %indvar.next = add i32 %x.12, 1 774 %exitcond = icmp ne i32 %indvar.next, %smax 775 br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge 776 777bb15.bb17_crit_edge: ; preds = %bb15 778 br label %bb17 779 780bb17: ; preds = %bb15.bb17_crit_edge, %bb13 781 br label %bb18 782 783bb18.loopexit: ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader 784 %36 = icmp ult i32 %x, 2 785 br i1 %36, label %bb20, label %bb.nph5 786 787bb18: ; preds = %bb17 788 %indvar.next1 = add i32 %y.14, 1 789 %exitcond4 = icmp ne i32 %indvar.next1, %smax3 790 br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge 791 792bb18.bb20_crit_edge: ; preds = %bb18 793 br label %bb18.bb20_crit_edge.split 794 795bb18.bb20_crit_edge.split: ; preds = %bb18.bb20_crit_edge, %bb.nph5 796 br label %bb20 797 798bb20: ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit 799 switch i32 %d, label %return [ 800 i32 3, label %bb22 801 i32 1, label %bb29 802 ] 803 804bb22: ; preds = %bb20 805 %37 = mul i32 %x, %w 806 %38 = udiv i32 %37, 4 807 %.sum3 = add i32 %38, %.sum2 808 %39 = add i32 %x, 15 809 %40 = and i32 %39, -16 810 %41 = add i32 %w, 15 811 %42 = and i32 %41, -16 812 %43 = mul i32 %40, %s 813 %44 = icmp ugt i32 %x, 0 814 br i1 %44, label %bb.nph, label %bb26 815 816bb.nph: ; preds = %bb22 817 br label %bb23 818 819bb23: ; preds = %bb24, %bb.nph 820 %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ] 821 %45 = mul i32 %y.21, %42 822 %.sum1 = add i32 %45, %43 823 %46 = getelementptr i8, ptr %r, i32 %.sum1 824 %47 = mul i32 %y.21, %w 825 %.sum5 = add i32 %47, %.sum3 826 %48 = getelementptr i8, ptr %j, i32 %.sum5 827 tail call void @llvm.memcpy.p0.p0.i32(ptr %48, ptr %46, i32 %w, i1 false) 828 br label %bb24 829 830bb24: ; preds = %bb23 831 %indvar.next5 = add i32 %y.21, 1 832 %exitcond6 = icmp ne i32 %indvar.next5, %x 833 br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge 834 835bb24.bb26_crit_edge: ; preds = %bb24 836 br label %bb26 837 838bb26: ; preds = %bb24.bb26_crit_edge, %bb22 839 %49 = mul i32 %x, %w 840 %.sum4 = add i32 %.sum3, %49 841 %50 = getelementptr i8, ptr %j, i32 %.sum4 842 %51 = mul i32 %x, %w 843 %52 = udiv i32 %51, 2 844 tail call void @llvm.memset.p0.i32(ptr %50, i8 -128, i32 %52, i1 false) 845 ret void 846 847bb29: ; preds = %bb20, %entry 848 %53 = add i32 %w, 15 849 %54 = and i32 %53, -16 850 %55 = icmp ugt i32 %x, 0 851 br i1 %55, label %bb.nph11, label %bb33 852 853bb.nph11: ; preds = %bb29 854 br label %bb30 855 856bb30: ; preds = %bb31, %bb.nph11 857 %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ] 858 %56 = mul i32 %y.310, %54 859 %57 = getelementptr i8, ptr %r, i32 %56 860 %58 = mul i32 %y.310, %w 861 %59 = getelementptr i8, ptr %j, i32 %58 862 tail call void @llvm.memcpy.p0.p0.i32(ptr %59, ptr %57, i32 %w, i1 false) 863 br label %bb31 864 865bb31: ; preds = %bb30 866 %indvar.next13 = add i32 %y.310, 1 867 %exitcond14 = icmp ne i32 %indvar.next13, %x 868 br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge 869 870bb31.bb33_crit_edge: ; preds = %bb31 871 br label %bb33 872 873bb33: ; preds = %bb31.bb33_crit_edge, %bb29 874 %60 = mul i32 %x, %w 875 %61 = getelementptr i8, ptr %j, i32 %60 876 %62 = mul i32 %x, %w 877 %63 = udiv i32 %62, 2 878 tail call void @llvm.memset.p0.i32(ptr %61, i8 -128, i32 %63, i1 false) 879 ret void 880 881return: ; preds = %bb20 882 ret void 883} 884 885declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind 886 887declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind 888