1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=pwr9 < %s | FileCheck %s 4 5define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7) { 6; CHECK-LABEL: foo: 7; CHECK: # %bb.0: 8; CHECK-NEXT: cmpd 5, 7 9; CHECK-NEXT: bgelr 0 10; CHECK-NEXT: # %bb.1: # %.preheader 11; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill 12; CHECK-NEXT: addi 27, 5, 2 13; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill 14; CHECK-NEXT: addi 28, 5, 3 15; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill 16; CHECK-NEXT: addi 30, 5, 1 17; CHECK-NEXT: mulld 12, 8, 5 18; CHECK-NEXT: mulld 0, 9, 8 19; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill 20; CHECK-NEXT: addi 29, 3, 16 21; CHECK-NEXT: sldi 11, 10, 3 22; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill 23; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill 24; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill 25; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill 26; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill 27; CHECK-NEXT: mulld 30, 8, 30 28; CHECK-NEXT: mulld 28, 8, 28 29; CHECK-NEXT: mulld 8, 8, 27 30; CHECK-NEXT: b .LBB0_3 31; CHECK-NEXT: .p2align 4 32; CHECK-NEXT: .LBB0_2: 33; CHECK-NEXT: add 5, 5, 9 34; CHECK-NEXT: add 12, 12, 0 35; CHECK-NEXT: add 30, 30, 0 36; CHECK-NEXT: add 28, 28, 0 37; CHECK-NEXT: add 8, 8, 0 38; CHECK-NEXT: cmpd 5, 7 39; CHECK-NEXT: bge 0, .LBB0_6 40; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1 41; CHECK-NEXT: # Child Loop BB0_5 Depth 2 42; CHECK-NEXT: sub 27, 5, 10 43; CHECK-NEXT: cmpd 6, 27 44; CHECK-NEXT: bge 0, .LBB0_2 45; CHECK-NEXT: # %bb.4: 46; CHECK-NEXT: add 25, 6, 12 47; CHECK-NEXT: add 24, 6, 8 48; CHECK-NEXT: sldi 26, 6, 3 49; CHECK-NEXT: sldi 23, 25, 3 50; CHECK-NEXT: add 25, 6, 30 51; CHECK-NEXT: sldi 24, 24, 3 52; CHECK-NEXT: add 26, 4, 26 53; CHECK-NEXT: sldi 22, 25, 3 54; CHECK-NEXT: add 25, 6, 28 55; CHECK-NEXT: add 24, 29, 24 56; CHECK-NEXT: add 23, 3, 23 57; CHECK-NEXT: sldi 25, 25, 3 58; CHECK-NEXT: add 22, 3, 22 59; CHECK-NEXT: add 25, 29, 25 60; CHECK-NEXT: .p2align 5 61; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 62; CHECK-NEXT: # => This Inner Loop Header: Depth=2 63; CHECK-NEXT: lfd 0, 0(26) 64; CHECK-NEXT: lfd 1, 0(23) 65; CHECK-NEXT: add 6, 6, 10 66; CHECK-NEXT: cmpd 6, 27 67; CHECK-NEXT: xsadddp 0, 0, 1 68; CHECK-NEXT: lfd 1, 8(23) 69; CHECK-NEXT: xsadddp 0, 0, 1 70; CHECK-NEXT: lfd 1, 16(23) 71; CHECK-NEXT: xsadddp 0, 0, 1 72; CHECK-NEXT: lfd 1, 24(23) 73; CHECK-NEXT: add 23, 23, 11 74; CHECK-NEXT: xsadddp 0, 0, 1 75; CHECK-NEXT: lfd 1, 0(22) 76; CHECK-NEXT: xsadddp 0, 0, 1 77; CHECK-NEXT: lfd 1, 8(22) 78; CHECK-NEXT: xsadddp 0, 0, 1 79; CHECK-NEXT: lfd 1, 16(22) 80; CHECK-NEXT: xsadddp 0, 0, 1 81; CHECK-NEXT: lfd 1, 24(22) 82; CHECK-NEXT: add 22, 22, 11 83; CHECK-NEXT: xsadddp 0, 0, 1 84; CHECK-NEXT: lfd 1, -16(24) 85; CHECK-NEXT: xsadddp 0, 0, 1 86; CHECK-NEXT: lfd 1, -8(24) 87; CHECK-NEXT: xsadddp 0, 0, 1 88; CHECK-NEXT: lfd 1, 0(24) 89; CHECK-NEXT: xsadddp 0, 0, 1 90; CHECK-NEXT: lfd 1, 8(24) 91; CHECK-NEXT: add 24, 24, 11 92; CHECK-NEXT: xsadddp 0, 0, 1 93; CHECK-NEXT: lfd 1, -16(25) 94; CHECK-NEXT: xsadddp 0, 0, 1 95; CHECK-NEXT: lfd 1, -8(25) 96; CHECK-NEXT: xsadddp 0, 0, 1 97; CHECK-NEXT: lfd 1, 0(25) 98; CHECK-NEXT: xsadddp 0, 0, 1 99; CHECK-NEXT: lfd 1, 8(25) 100; CHECK-NEXT: add 25, 25, 11 101; CHECK-NEXT: xsadddp 0, 0, 1 102; CHECK-NEXT: stfd 0, 0(26) 103; CHECK-NEXT: add 26, 26, 11 104; CHECK-NEXT: blt 0, .LBB0_5 105; CHECK-NEXT: b .LBB0_2 106; CHECK-NEXT: .LBB0_6: 107; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload 108; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload 109; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload 110; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload 111; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload 112; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload 113; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload 114; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload 115; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload 116; CHECK-NEXT: blr 117 %9 = icmp slt i64 %2, %4 118 br i1 %9, label %10, label %97 119 12010: ; preds = %8, %93 121 %11 = phi i64 [ %95, %93 ], [ %2, %8 ] 122 %12 = phi i64 [ %94, %93 ], [ %3, %8 ] 123 %13 = sub nsw i64 %11, %7 124 %14 = icmp slt i64 %12, %13 125 br i1 %14, label %15, label %93 126 12715: ; preds = %10 128 %16 = mul nsw i64 %11, %5 129 %17 = add nsw i64 %11, 1 130 %18 = mul nsw i64 %17, %5 131 %19 = add nsw i64 %11, 2 132 %20 = mul nsw i64 %19, %5 133 %21 = add nsw i64 %11, 3 134 %22 = mul nsw i64 %21, %5 135 br label %23 136 13723: ; preds = %15, %23 138 %24 = phi i64 [ %12, %15 ], [ %91, %23 ] 139 %25 = getelementptr inbounds double, ptr %1, i64 %24 140 %26 = load double, ptr %25, align 8 141 %27 = add nsw i64 %24, %16 142 %28 = getelementptr inbounds double, ptr %0, i64 %27 143 %29 = load double, ptr %28, align 8 144 %30 = fadd double %26, %29 145 %31 = add nsw i64 %27, 1 146 %32 = getelementptr inbounds double, ptr %0, i64 %31 147 %33 = load double, ptr %32, align 8 148 %34 = fadd double %30, %33 149 %35 = add nsw i64 %27, 2 150 %36 = getelementptr inbounds double, ptr %0, i64 %35 151 %37 = load double, ptr %36, align 8 152 %38 = fadd double %34, %37 153 %39 = add nsw i64 %27, 3 154 %40 = getelementptr inbounds double, ptr %0, i64 %39 155 %41 = load double, ptr %40, align 8 156 %42 = fadd double %38, %41 157 %43 = add nsw i64 %24, %18 158 %44 = getelementptr inbounds double, ptr %0, i64 %43 159 %45 = load double, ptr %44, align 8 160 %46 = fadd double %42, %45 161 %47 = add nsw i64 %43, 1 162 %48 = getelementptr inbounds double, ptr %0, i64 %47 163 %49 = load double, ptr %48, align 8 164 %50 = fadd double %46, %49 165 %51 = add nsw i64 %43, 2 166 %52 = getelementptr inbounds double, ptr %0, i64 %51 167 %53 = load double, ptr %52, align 8 168 %54 = fadd double %50, %53 169 %55 = add nsw i64 %43, 3 170 %56 = getelementptr inbounds double, ptr %0, i64 %55 171 %57 = load double, ptr %56, align 8 172 %58 = fadd double %54, %57 173 %59 = add nsw i64 %24, %20 174 %60 = getelementptr inbounds double, ptr %0, i64 %59 175 %61 = load double, ptr %60, align 8 176 %62 = fadd double %58, %61 177 %63 = add nsw i64 %59, 1 178 %64 = getelementptr inbounds double, ptr %0, i64 %63 179 %65 = load double, ptr %64, align 8 180 %66 = fadd double %62, %65 181 %67 = add nsw i64 %59, 2 182 %68 = getelementptr inbounds double, ptr %0, i64 %67 183 %69 = load double, ptr %68, align 8 184 %70 = fadd double %66, %69 185 %71 = add nsw i64 %59, 3 186 %72 = getelementptr inbounds double, ptr %0, i64 %71 187 %73 = load double, ptr %72, align 8 188 %74 = fadd double %70, %73 189 %75 = add nsw i64 %24, %22 190 %76 = getelementptr inbounds double, ptr %0, i64 %75 191 %77 = load double, ptr %76, align 8 192 %78 = fadd double %74, %77 193 %79 = add nsw i64 %75, 1 194 %80 = getelementptr inbounds double, ptr %0, i64 %79 195 %81 = load double, ptr %80, align 8 196 %82 = fadd double %78, %81 197 %83 = add nsw i64 %75, 2 198 %84 = getelementptr inbounds double, ptr %0, i64 %83 199 %85 = load double, ptr %84, align 8 200 %86 = fadd double %82, %85 201 %87 = add nsw i64 %75, 3 202 %88 = getelementptr inbounds double, ptr %0, i64 %87 203 %89 = load double, ptr %88, align 8 204 %90 = fadd double %86, %89 205 store double %90, ptr %25, align 8 206 %91 = add nsw i64 %24, %7 207 %92 = icmp slt i64 %91, %13 208 br i1 %92, label %23, label %93 209 21093: ; preds = %23, %10 211 %94 = phi i64 [ %12, %10 ], [ %91, %23 ] 212 %95 = add nsw i64 %11, %6 213 %96 = icmp slt i64 %95, %4 214 br i1 %96, label %10, label %97 215 21697: ; preds = %93, %8 217 ret void 218} 219