1; Test the loop alignment. 2; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC 3; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 4; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 5; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 6; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 7 8; Test the loop alignment and the option -disable-ppc-innermost-loop-align32. 9; RUN: llc -verify-machineinstrs -mcpu=a2 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 10; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 11; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 12; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 13; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 14 15 16%struct.parm = type { ptr, i32, i32 } 17 18; Test the loop alignment when the innermost hot loop has more than 8 instructions. 19define void @big_loop(ptr %arg) { 20entry: 21 %localArg.sroa.0.0.copyload = load ptr, ptr %arg, align 8 22 %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 1 23 %localArg.sroa.4.0.copyload = load i32, ptr %localArg.sroa.4.0..sroa_idx56, align 8 24 %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 2 25 %localArg.sroa.5.0.copyload = load i32, ptr %localArg.sroa.5.0..sroa_idx58, align 4 26 %0 = sext i32 %localArg.sroa.5.0.copyload to i64 27 br label %do.body 28 29do.body: ; preds = %do.end, %entry 30 %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ] 31 br label %do.body3 32 33do.body3: ; preds = %do.body3, %do.body 34 %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ] 35 %1 = add nsw i64 %indvars.iv, 2 36 %arrayidx = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %1 37 %2 = add nsw i64 %indvars.iv, 3 38 %3 = trunc i64 %1 to i32 39 %4 = add nsw i64 %indvars.iv, 4 40 %arrayidx10 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %2 41 %5 = trunc i64 %2 to i32 42 store i32 %5, ptr %arrayidx10, align 4 43 %arrayidx12 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %4 44 %6 = trunc i64 %4 to i32 45 store i32 %6, ptr %arrayidx12, align 4 46 store i32 %3, ptr %arrayidx, align 4 47 %arrayidx21 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %indvars.iv 48 %7 = trunc i64 %indvars.iv to i32 49 %8 = add i32 %7, 1 50 store i32 %8, ptr %arrayidx21, align 4 51 %indvars.iv.next = add nsw i64 %indvars.iv, -1 52 %9 = icmp eq i64 %indvars.iv, 0 53 br i1 %9, label %do.end, label %do.body3 54 55do.end: ; preds = %do.body3 56 %dec24 = add nsw i32 %m.0, -1 57 %tobool25 = icmp eq i32 %m.0, 0 58 br i1 %tobool25, label %do.end26, label %do.body 59 60do.end26: ; preds = %do.end 61 %arrayidx28 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %0 62 store i32 0, ptr %arrayidx28, align 4 63 ret void 64 65 66; CHECK-LABEL: @big_loop 67; CHECK: mtctr 68; GENERIC: .p2align 4 69; PWR: .p2align 5 70; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 71; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 72; CHECK: bdnz 73} 74 75; Test the loop alignment when the innermost hot loop has 5-8 instructions. 76define void @general_loop(ptr %s, i64 %m) { 77entry: 78 %tobool40 = icmp eq i64 %m, 0 79 br i1 %tobool40, label %while.end18, label %while.body3.lr.ph 80 81while.cond.loopexit: ; preds = %while.body3 82 %tobool = icmp eq i64 %dec, 0 83 br i1 %tobool, label %while.end18, label %while.body3.lr.ph 84 85while.body3.lr.ph: ; preds = %entry, %while.cond.loopexit 86 %m.addr.041 = phi i64 [ %dec, %while.cond.loopexit ], [ %m, %entry ] 87 %dec = add nsw i64 %m.addr.041, -1 88 %conv = trunc i64 %m.addr.041 to i32 89 %conv11 = trunc i64 %dec to i32 90 br label %while.body3 91 92while.body3: ; preds = %while.body3.lr.ph, %while.body3 93 %n.039 = phi i64 [ %m.addr.041, %while.body3.lr.ph ], [ %dec16, %while.body3 ] 94 %inc = add nsw i64 %n.039, 1 95 %arrayidx = getelementptr inbounds i32, ptr %s, i64 %n.039 96 %inc5 = add nsw i64 %n.039, 2 97 %arrayidx6 = getelementptr inbounds i32, ptr %s, i64 %inc 98 %sub = sub nsw i64 %dec, %inc5 99 %conv7 = trunc i64 %sub to i32 100 %arrayidx9 = getelementptr inbounds i32, ptr %s, i64 %inc5 101 store i32 %conv7, ptr %arrayidx9, align 4 102 store i32 %conv11, ptr %arrayidx6, align 4 103 store i32 %conv, ptr %arrayidx, align 4 104 %dec16 = add nsw i64 %n.039, -1 105 %tobool2 = icmp eq i64 %dec16, 0 106 br i1 %tobool2, label %while.cond.loopexit, label %while.body3 107 108while.end18: ; preds = %while.cond.loopexit, %entry 109 ret void 110 111 112; CHECK-LABEL: @general_loop 113; CHECK: mtctr 114; GENERIC: .p2align 4 115; PWR: .p2align 5 116; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 117; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 5 118; CHECK: bdnz 119} 120 121; Test the small loop alignment when the innermost hot loop has less than 4 instructions. 122define void @small_loop(i64 %m) { 123entry: 124 br label %do.body 125 126do.body: ; preds = %do.end, %entry 127 %m.addr.0 = phi i64 [ %m, %entry ], [ %1, %do.end ] 128 br label %do.body1 129 130do.body1: ; preds = %do.body1, %do.body 131 %n.0 = phi i64 [ %m.addr.0, %do.body ], [ %0, %do.body1 ] 132 %0 = tail call i64 asm "subi $0,$0,1", "=r,0"(i64 %n.0) 133 %tobool = icmp eq i64 %0, 0 134 br i1 %tobool, label %do.end, label %do.body1 135 136do.end: ; preds = %do.body1 137 %1 = tail call i64 asm "subi $1,$1,1", "=r,0"(i64 %m.addr.0) 138 %tobool3 = icmp eq i64 %1, 0 139 br i1 %tobool3, label %do.end4, label %do.body 140 141do.end4: ; preds = %do.end 142 ret void 143 144 145; CHECK-LABEL: @small_loop 146; CHECK: mr 147; GENERIC: .p2align 4 148; PWR: .p2align 5 149; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 150; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 151; CHECK: bne 152} 153 154; Test the loop alignment when the innermost cold loop has more than 8 instructions. 155define void @big_loop_cold_innerloop(ptr %arg) { 156entry: 157 %localArg.sroa.0.0.copyload = load ptr, ptr %arg, align 8 158 %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 1 159 %localArg.sroa.4.0.copyload = load i32, ptr %localArg.sroa.4.0..sroa_idx56, align 8 160 %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 2 161 %localArg.sroa.5.0.copyload = load i32, ptr %localArg.sroa.5.0..sroa_idx58, align 4 162 %0 = sext i32 %localArg.sroa.5.0.copyload to i64 163 br label %do.body 164 165do.body: ; preds = %do.end, %entry 166 %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ] 167 br label %do.body3 168 169do.body3: ; preds = %do.body3, %do.body 170 %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ] 171 %1 = add nsw i64 %indvars.iv, 2 172 %arrayidx = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %1 173 %2 = add nsw i64 %indvars.iv, 3 174 %3 = trunc i64 %1 to i32 175 %4 = add nsw i64 %indvars.iv, 4 176 %arrayidx10 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %2 177 %5 = trunc i64 %2 to i32 178 store i32 %5, ptr %arrayidx10, align 4 179 %arrayidx12 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %4 180 %6 = trunc i64 %4 to i32 181 store i32 %6, ptr %arrayidx12, align 4 182 store i32 %3, ptr %arrayidx, align 4 183 %arrayidx21 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %indvars.iv 184 %7 = trunc i64 %indvars.iv to i32 185 %8 = add i32 %7, 1 186 store i32 %8, ptr %arrayidx21, align 4 187 %indvars.iv.next = add nsw i64 %indvars.iv, -1 188 %9 = icmp eq i64 %indvars.iv, 0 189 br i1 %9, label %do.end, label %do.body3 190 191do.end: ; preds = %do.body3 192 %dec24 = add nsw i32 %m.0, -1 193 %tobool25 = icmp eq i32 %m.0, 0 194 br i1 %tobool25, label %do.end26, label %do.body 195 196do.end26: ; preds = %do.end 197 %arrayidx28 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %0 198 store i32 0, ptr %arrayidx28, align 4 199 ret void 200 201 202; CHECK-LABEL: @big_loop_cold_innerloop 203; CHECK: mtctr 204; PWR: .p2align 5 205; CHECK-NOT: .p2align 5 206; CHECK: bdnz 207} 208