1; RUN: llc < %s -mtriple=x86_64-- -mcpu=generic -pre-RA-sched=source -enable-misched \ 2; RUN: -misched-prera-direction=topdown -verify-machineinstrs \ 3; RUN: | FileCheck %s -check-prefix=TOPDOWN 4; RUN: llc < %s -mtriple=x86_64-- -mcpu=generic -pre-RA-sched=source -enable-misched \ 5; RUN: -misched=ilpmin -verify-machineinstrs \ 6; RUN: | FileCheck %s -check-prefix=ILPMIN 7; RUN: llc < %s -mtriple=x86_64-- -mcpu=generic -pre-RA-sched=source -enable-misched \ 8; RUN: -misched=ilpmax -verify-machineinstrs \ 9; RUN: | FileCheck %s -check-prefix=ILPMAX 10; 11; Verify that the MI scheduler minimizes register pressure for a 12; uniform set of bottom-up subtrees (unrolled matrix multiply). 13; 14; For current top-down heuristics, ensure that some folded imulls have 15; been reordered with the stores. This tests the scheduler's cheap 16; alias analysis ability (that doesn't require any AliasAnalysis pass). 17; 18; TOPDOWN-LABEL: %for.body 19; TOPDOWN: movl %{{.*}}, ( 20; TOPDOWN: imull {{[0-9]*}}( 21; TOPDOWN: movl %{{.*}}, 4( 22; TOPDOWN: imull {{[0-9]*}}( 23; TOPDOWN: movl %{{.*}}, 8( 24; TOPDOWN: movl %{{.*}}, 12( 25; TOPDOWN-LABEL: %for.end 26; 27; For -misched=ilpmin, verify that each expression subtree is 28; scheduled independently, and that the imull/adds are interleaved. 29; 30; ILPMIN-LABEL: %for.body 31; ILPMIN: movl %{{.*}}, ( 32; ILPMIN: imull 33; ILPMIN: imull 34; ILPMIN: addl 35; ILPMIN: imull 36; ILPMIN: imull 37; ILPMIN: addl 38; ILPMIN: addl 39; ILPMIN: movl %{{.*}}, 4( 40; ILPMIN: imull 41; ILPMIN: imull 42; ILPMIN: addl 43; ILPMIN: imull 44; ILPMIN: imull 45; ILPMIN: addl 46; ILPMIN: addl 47; ILPMIN: movl %{{.*}}, 8( 48; ILPMIN: imull 49; ILPMIN: imull 50; ILPMIN: addl 51; ILPMIN: imull 52; ILPMIN: imull 53; ILPMIN: addl 54; ILPMIN: addl 55; ILPMIN: movl %{{.*}}, 12( 56; ILPMIN-LABEL: %for.end 57; 58; For -misched=ilpmax, verify that each expression subtree is 59; scheduled independently, and that the imull/adds are clustered. 60; 61; ILPMAX-LABEL: %for.body 62; ILPMAX: movl %{{.*}}, ( 63; ILPMAX: imull 64; ILPMAX: imull 65; ILPMAX: imull 66; ILPMAX: imull 67; ILPMAX: addl 68; ILPMAX: addl 69; ILPMAX: addl 70; ILPMAX: movl %{{.*}}, 4( 71; ILPMAX: imull 72; ILPMAX: imull 73; ILPMAX: imull 74; ILPMAX: imull 75; ILPMAX: addl 76; ILPMAX: addl 77; ILPMAX: addl 78; ILPMAX: movl %{{.*}}, 8( 79; ILPMAX: imull 80; ILPMAX: imull 81; ILPMAX: imull 82; ILPMAX: imull 83; ILPMAX: addl 84; ILPMAX: addl 85; ILPMAX: addl 86; ILPMAX: movl %{{.*}}, 12( 87; ILPMAX-LABEL: %for.end 88 89define void @mmult(ptr noalias nocapture %m1, ptr noalias nocapture %m2, 90ptr noalias nocapture %m3) nounwind uwtable ssp { 91entry: 92 br label %for.body 93 94for.body: ; preds = %for.body, %entry 95 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 96 %arrayidx8 = getelementptr inbounds [4 x i32], ptr %m1, i64 %indvars.iv, i64 0 97 %tmp = load i32, ptr %arrayidx8, align 4 98 %tmp1 = load i32, ptr %m2, align 4 99 %arrayidx8.1 = getelementptr inbounds [4 x i32], ptr %m1, i64 %indvars.iv, i64 1 100 %tmp2 = load i32, ptr %arrayidx8.1, align 4 101 %arrayidx12.1 = getelementptr inbounds [4 x i32], ptr %m2, i64 1, i64 0 102 %tmp3 = load i32, ptr %arrayidx12.1, align 4 103 %arrayidx8.2 = getelementptr inbounds [4 x i32], ptr %m1, i64 %indvars.iv, i64 2 104 %tmp4 = load i32, ptr %arrayidx8.2, align 4 105 %arrayidx12.2 = getelementptr inbounds [4 x i32], ptr %m2, i64 2, i64 0 106 %tmp5 = load i32, ptr %arrayidx12.2, align 4 107 %arrayidx8.3 = getelementptr inbounds [4 x i32], ptr %m1, i64 %indvars.iv, i64 3 108 %tmp6 = load i32, ptr %arrayidx8.3, align 4 109 %arrayidx12.3 = getelementptr inbounds [4 x i32], ptr %m2, i64 3, i64 0 110 %tmp8 = load i32, ptr %arrayidx8, align 4 111 %arrayidx12.137 = getelementptr inbounds [4 x i32], ptr %m2, i64 0, i64 1 112 %tmp9 = load i32, ptr %arrayidx12.137, align 4 113 %tmp10 = load i32, ptr %arrayidx8.1, align 4 114 %arrayidx12.1.1 = getelementptr inbounds [4 x i32], ptr %m2, i64 1, i64 1 115 %tmp11 = load i32, ptr %arrayidx12.1.1, align 4 116 %tmp12 = load i32, ptr %arrayidx8.2, align 4 117 %arrayidx12.2.1 = getelementptr inbounds [4 x i32], ptr %m2, i64 2, i64 1 118 %tmp13 = load i32, ptr %arrayidx12.2.1, align 4 119 %tmp14 = load i32, ptr %arrayidx8.3, align 4 120 %arrayidx12.3.1 = getelementptr inbounds [4 x i32], ptr %m2, i64 3, i64 1 121 %tmp15 = load i32, ptr %arrayidx12.3.1, align 4 122 %tmp16 = load i32, ptr %arrayidx8, align 4 123 %arrayidx12.239 = getelementptr inbounds [4 x i32], ptr %m2, i64 0, i64 2 124 %tmp17 = load i32, ptr %arrayidx12.239, align 4 125 %tmp18 = load i32, ptr %arrayidx8.1, align 4 126 %arrayidx12.1.2 = getelementptr inbounds [4 x i32], ptr %m2, i64 1, i64 2 127 %tmp19 = load i32, ptr %arrayidx12.1.2, align 4 128 %tmp20 = load i32, ptr %arrayidx8.2, align 4 129 %arrayidx12.2.2 = getelementptr inbounds [4 x i32], ptr %m2, i64 2, i64 2 130 %tmp21 = load i32, ptr %arrayidx12.2.2, align 4 131 %tmp22 = load i32, ptr %arrayidx8.3, align 4 132 %arrayidx12.3.2 = getelementptr inbounds [4 x i32], ptr %m2, i64 3, i64 2 133 %tmp23 = load i32, ptr %arrayidx12.3.2, align 4 134 %tmp24 = load i32, ptr %arrayidx8, align 4 135 %arrayidx12.341 = getelementptr inbounds [4 x i32], ptr %m2, i64 0, i64 3 136 %tmp25 = load i32, ptr %arrayidx12.341, align 4 137 %tmp26 = load i32, ptr %arrayidx8.1, align 4 138 %arrayidx12.1.3 = getelementptr inbounds [4 x i32], ptr %m2, i64 1, i64 3 139 %tmp27 = load i32, ptr %arrayidx12.1.3, align 4 140 %tmp28 = load i32, ptr %arrayidx8.2, align 4 141 %arrayidx12.2.3 = getelementptr inbounds [4 x i32], ptr %m2, i64 2, i64 3 142 %tmp29 = load i32, ptr %arrayidx12.2.3, align 4 143 %tmp30 = load i32, ptr %arrayidx8.3, align 4 144 %arrayidx12.3.3 = getelementptr inbounds [4 x i32], ptr %m2, i64 3, i64 3 145 %tmp31 = load i32, ptr %arrayidx12.3.3, align 4 146 %tmp7 = load i32, ptr %arrayidx12.3, align 4 147 %mul = mul nsw i32 %tmp1, %tmp 148 %mul.1 = mul nsw i32 %tmp3, %tmp2 149 %mul.2 = mul nsw i32 %tmp5, %tmp4 150 %mul.3 = mul nsw i32 %tmp7, %tmp6 151 %mul.138 = mul nsw i32 %tmp9, %tmp8 152 %mul.1.1 = mul nsw i32 %tmp11, %tmp10 153 %mul.2.1 = mul nsw i32 %tmp13, %tmp12 154 %mul.3.1 = mul nsw i32 %tmp15, %tmp14 155 %mul.240 = mul nsw i32 %tmp17, %tmp16 156 %mul.1.2 = mul nsw i32 %tmp19, %tmp18 157 %mul.2.2 = mul nsw i32 %tmp21, %tmp20 158 %mul.3.2 = mul nsw i32 %tmp23, %tmp22 159 %mul.342 = mul nsw i32 %tmp25, %tmp24 160 %mul.1.3 = mul nsw i32 %tmp27, %tmp26 161 %mul.2.3 = mul nsw i32 %tmp29, %tmp28 162 %mul.3.3 = mul nsw i32 %tmp31, %tmp30 163 %add.1 = add nsw i32 %mul.1, %mul 164 %add.2 = add nsw i32 %mul.2, %add.1 165 %add.3 = add nsw i32 %mul.3, %add.2 166 %add.1.1 = add nsw i32 %mul.1.1, %mul.138 167 %add.2.1 = add nsw i32 %mul.2.1, %add.1.1 168 %add.3.1 = add nsw i32 %mul.3.1, %add.2.1 169 %add.1.2 = add nsw i32 %mul.1.2, %mul.240 170 %add.2.2 = add nsw i32 %mul.2.2, %add.1.2 171 %add.3.2 = add nsw i32 %mul.3.2, %add.2.2 172 %add.1.3 = add nsw i32 %mul.1.3, %mul.342 173 %add.2.3 = add nsw i32 %mul.2.3, %add.1.3 174 %add.3.3 = add nsw i32 %mul.3.3, %add.2.3 175 %arrayidx16 = getelementptr inbounds [4 x i32], ptr %m3, i64 %indvars.iv, i64 0 176 store i32 %add.3, ptr %arrayidx16, align 4 177 %arrayidx16.1 = getelementptr inbounds [4 x i32], ptr %m3, i64 %indvars.iv, i64 1 178 store i32 %add.3.1, ptr %arrayidx16.1, align 4 179 %arrayidx16.2 = getelementptr inbounds [4 x i32], ptr %m3, i64 %indvars.iv, i64 2 180 store i32 %add.3.2, ptr %arrayidx16.2, align 4 181 %arrayidx16.3 = getelementptr inbounds [4 x i32], ptr %m3, i64 %indvars.iv, i64 3 182 store i32 %add.3.3, ptr %arrayidx16.3, align 4 183 %indvars.iv.next = add i64 %indvars.iv, 1 184 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 185 %exitcond = icmp eq i32 %lftr.wideiv, 4 186 br i1 %exitcond, label %for.end, label %for.body 187 188for.end: ; preds = %for.body 189 ret void 190} 191