1; Test that loops with sufficient registers do not reload or spill on 2; stack. These cases include calls and it is necessary to have the GR128 / 3; FP128 registers part of the callee saved registers list in order to avoid 4; spilling / reloading. 5; 6; RUN: llc -switch-peel-threshold=101 < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s 7 8%0 = type { ptr, ptr, ptr, i32, ptr, i64, i64, i64, i64, i64, i64, %2, %5, %7 } 9%1 = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr } 10%2 = type { i64, i64, ptr } 11%3 = type { ptr, i64 } 12%4 = type { i64, ptr } 13%5 = type { i64, i64, ptr } 14%6 = type { i64, ptr, i32, i64, ptr } 15%7 = type { i64, i64, ptr } 16%8 = type { i64, ptr, ptr, ptr, i64, ptr, %5, i32, i64, i64 } 17 18declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) 19 20define void @fun0(ptr) { 21; CHECK-LABEL: .LBB0_4 22; CHECK: => This Inner Loop Header 23; CHECK-NOT: 16-byte Folded Spill 24; CHECK-NOT: 16-byte Folded Reload 25 26 %2 = load i64, ptr undef, align 8 27 %3 = udiv i64 128, %2 28 %4 = mul i64 %3, %2 29 %5 = load i64, ptr undef, align 8 30 switch i32 undef, label %36 [ 31 i32 1, label %6 32 i32 2, label %7 33 i32 3, label %8 34 i32 4, label %9 35 i32 5, label %10 36 i32 6, label %11 37 ] 38 39; <label>:6: ; preds = %1 40 br label %12 41 42; <label>:7: ; preds = %1 43 br label %12 44 45; <label>:8: ; preds = %1 46 unreachable 47 48; <label>:9: ; preds = %1 49 unreachable 50 51; <label>:10: ; preds = %1 52 unreachable 53 54; <label>:11: ; preds = %1 55 unreachable 56 57; <label>:12: ; preds = %7, %6 58 %13 = getelementptr inbounds %0, ptr %0, i64 0, i32 5 59 br label %14 60 61; <label>:14: ; preds = %31, %12 62 %15 = phi i64 [ undef, %31 ], [ %5, %12 ] 63 %16 = phi i64 [ %35, %31 ], [ undef, %12 ] 64 %17 = load i64, ptr %13, align 8 65 %18 = icmp ult i64 %15, %17 66 %19 = select i1 %18, i64 %15, i64 %17 67 %20 = udiv i64 %19, %4 68 %21 = icmp ugt i64 %20, 1 69 %22 = select i1 %21, i64 %20, i64 1 70 %23 = sub i64 %22, 0 71 br label %24 72 73; <label>:24: ; preds = %24, %14 74 %25 = phi i64 [ %23, %14 ], [ %27, %24 ] 75 call void @llvm.memcpy.p0.p0.i64(ptr undef, ptr nonnull undef, i64 %4, i1 false) 76 %26 = getelementptr inbounds i8, ptr null, i64 %4 77 store ptr %26, ptr undef, align 8 78 %27 = add i64 %25, -4 79 %28 = icmp eq i64 %27, 0 80 br i1 %28, label %31, label %24 81 82; <label>:29: ; preds = %24 83 br i1 undef, label %31, label %30 84 85; <label>:30: ; preds = %29 86 call void @llvm.memcpy.p0.p0.i64(ptr %26, ptr nonnull undef, i64 %4, i1 false) 87 br label %31 88 89; <label>:31: ; preds = %30, %29 90 %32 = call signext i32 undef(ptr undef, i64 %16, i32 signext 8) 91 %33 = icmp eq i64 undef, 0 92 %34 = select i1 %33, i64 0, i64 %19 93 %35 = add i64 %34, %16 94 br i1 %33, label %36, label %14 95 96; <label>:36: ; preds = %31, %1 97 ret void 98} 99 100declare fp128 @llvm.pow.f128(fp128, fp128) 101 102define void @fun1(ptr) { 103; CHECK-LABEL: .LBB1_2 104; CHECK: =>This Inner Loop Header: Depth=1 105; CHECK-NOT: 16-byte Folded Spill 106; CHECK-NOT: 16-byte Folded Reload 107; CHECK-LABEL: .LBB1_3 108 109 br i1 undef, label %7, label %2 110 111; <label>:2: ; preds = %2, %1 112 %3 = phi fp128 [ %5, %2 ], [ 0xL00000000000000000000000000000000, %1 ] 113 %4 = tail call fp128 @llvm.pow.f128(fp128 0xL00000000000000000000000000000000, fp128 0xL00000000000000000000000000000000) #2 114 %5 = fadd fp128 %3, %4 115 %6 = icmp eq i64 undef, 0 116 br i1 %6, label %7, label %2 117 118; <label>:7: ; preds = %2, %1 119 %8 = phi fp128 [ 0xL00000000000000000000000000000000, %1 ], [ %5, %2 ] 120 %9 = fadd fp128 0xL00000000000000000000000000000000, %8 121 %10 = fadd fp128 0xL00000000000000000000000000000000, %9 122 %11 = fadd fp128 0xL00000000000000000000000000000000, %10 123 %12 = tail call fp128 @llvm.pow.f128(fp128 %11, fp128 0xL00000000000000000000000000000000) #2 124 store fp128 %12, ptr %0, align 8 125 ret void 126} 127