1; RUN: llc -O3 -mtriple=thumbv7em -mcpu=cortex-m4 %s -o - | FileCheck %s --check-prefix=CHECK-REG-PRESSURE 2; RUN: llc -O3 -mtriple=thumbv7eb %s -o - | FileCheck %s --check-prefix=CHECK-UNSUPPORTED 3; RUN: llc -O3 -mtriple=thumbv8m.main -mattr=+dsp -arm-parallel-dsp-load-limit=20 %s -o - | FileCheck %s --check-prefix=CHECK 4 5; CHECK-UNSUPPORTED-LABEL: unroll_n_jam_smlad 6; CHECK-UNSUPPORTED-NOT: smlad r{{.}} 7 8; Test that the duplicate loads are removed, which allows parallel dsp to find 9; the parallel operations. 10 11; CHECK-LABEL: unroll_n_jam_smlad 12define void @unroll_n_jam_smlad(ptr %res, ptr %A, ptr %B, i32 %N, i32 %idx) { 13entry: 14 %xtraiter306.i = and i32 %N, 3 15 %unroll_iter310.i = sub i32 %N, %xtraiter306.i 16 %arrayidx.us.i117.i = getelementptr inbounds i32, ptr %res, i32 %idx 17 store i32 0, ptr %arrayidx.us.i117.i, align 4 18 %mul.us.i118.i = mul i32 %idx, %N 19 %inc11.us.i.i = or disjoint i32 %idx, 1 20 %arrayidx.us.i117.1.i = getelementptr inbounds i32, ptr %res, i32 %inc11.us.i.i 21 store i32 0, ptr %arrayidx.us.i117.1.i, align 4 22 %mul.us.i118.1.i = mul i32 %inc11.us.i.i, %N 23 %inc11.us.i.1.i = or disjoint i32 %idx, 2 24 %arrayidx.us.i117.2.i = getelementptr inbounds i32, ptr %res, i32 %inc11.us.i.1.i 25 store i32 0, ptr %arrayidx.us.i117.2.i, align 4 26 %mul.us.i118.2.i = mul i32 %inc11.us.i.1.i, %N 27 %inc11.us.i.2.i = or disjoint i32 %idx, 3 28 %arrayidx.us.i117.3.i = getelementptr inbounds i32, ptr %res, i32 %inc11.us.i.2.i 29 store i32 0, ptr %arrayidx.us.i117.3.i, align 4 30 %mul.us.i118.3.i = mul i32 %inc11.us.i.2.i, %N 31 %inc11.us.i.3.i = add i32 %idx, 4 32 br label %for.body 33 34; TODO: CSE, or something similar, is required to remove the duplicate loads. 35; CHECK: %for.body 36; CHECK: smlad 37; CHECK: smlad 38; CHECK-NOT: smlad r{{.*}} 39 40; CHECK-REG-PRESSURE: .LBB0_1: 41; CHECK-REG-PRESSURE-NOT: call i32 @llvm.arm.smlad 42; CHECK-REG-PRESSURE: ldr{{.*}}, [sp 43; CHECK-REG-PRESSURE: ldr{{.*}}, [sp 44; CHECK-REG-PRESSURE: ldr{{.*}}, [sp 45; CHECK-REG-PRESSURE: ldr{{.*}}, [sp 46; CHECK-REG-PRESSURE: ldr{{.*}}, [sp 47; CHECK-REG-PRESSURE-NOT: ldr{{.*}}, [sp 48; CHECK-REG-PRESSURE: bne .LBB0_1 49 50for.body: 51 %A3 = phi i32 [ %add9.us.i.3361.i, %for.body ], [ 0, %entry ] 52 %j.026.us.i.i = phi i32 [ %inc.us.i.3362.i, %for.body ], [ 0, %entry ] 53 %A4 = phi i32 [ %add9.us.i.1.3.i, %for.body ], [ 0, %entry ] 54 %A5 = phi i32 [ %add9.us.i.2.3.i, %for.body ], [ 0, %entry ] 55 %A6 = phi i32 [ %add9.us.i.3.3.i, %for.body ], [ 0, %entry ] 56 %niter335.i = phi i32 [ %niter335.nsub.3.i, %for.body ], [ %unroll_iter310.i, %entry ] 57 %add.us.i.i = add i32 %j.026.us.i.i, %mul.us.i118.i 58 %arrayidx4.us.i.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.i 59 %A7 = load i16, ptr %arrayidx4.us.i.i, align 2 60 %conv.us.i.i = sext i16 %A7 to i32 61 %arrayidx5.us.i.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i 62 %A8 = load i16, ptr %arrayidx5.us.i.i, align 2 63 %conv6.us.i.i = sext i16 %A8 to i32 64 %mul7.us.i.i = mul nsw i32 %conv6.us.i.i, %conv.us.i.i 65 %add9.us.i.i = add nsw i32 %mul7.us.i.i, %A3 66 %inc.us.i.i = or disjoint i32 %j.026.us.i.i, 1 67 %add.us.i.1.i = add i32 %j.026.us.i.i, %mul.us.i118.1.i 68 %arrayidx4.us.i.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.i 69 %A9 = load i16, ptr %arrayidx4.us.i.1.i, align 2 70 %conv.us.i.1.i = sext i16 %A9 to i32 71 %arrayidx5.us.i.1.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i 72 %B0 = load i16, ptr %arrayidx5.us.i.1.i, align 2 73 %conv6.us.i.1.i = sext i16 %B0 to i32 74 %mul7.us.i.1.i = mul nsw i32 %conv6.us.i.1.i, %conv.us.i.1.i 75 %add9.us.i.1.i = add nsw i32 %mul7.us.i.1.i, %A4 76 %inc.us.i.1.i = or disjoint i32 %j.026.us.i.i, 1 77 %add.us.i.2.i = add i32 %j.026.us.i.i, %mul.us.i118.2.i 78 %arrayidx4.us.i.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.i 79 %B1 = load i16, ptr %arrayidx4.us.i.2.i, align 2 80 %conv.us.i.2.i = sext i16 %B1 to i32 81 %arrayidx5.us.i.2.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i 82 %B2 = load i16, ptr %arrayidx5.us.i.2.i, align 2 83 %conv6.us.i.2.i = sext i16 %B2 to i32 84 %mul7.us.i.2.i = mul nsw i32 %conv6.us.i.2.i, %conv.us.i.2.i 85 %add9.us.i.2.i = add nsw i32 %mul7.us.i.2.i, %A5 86 %inc.us.i.2.i = or disjoint i32 %j.026.us.i.i, 1 87 %add.us.i.3.i = add i32 %j.026.us.i.i, %mul.us.i118.3.i 88 %arrayidx4.us.i.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.i 89 %B3 = load i16, ptr %arrayidx4.us.i.3.i, align 2 90 %conv.us.i.3.i = sext i16 %B3 to i32 91 %arrayidx5.us.i.3.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i 92 %B4 = load i16, ptr %arrayidx5.us.i.3.i, align 2 93 %conv6.us.i.3.i = sext i16 %B4 to i32 94 %mul7.us.i.3.i = mul nsw i32 %conv6.us.i.3.i, %conv.us.i.3.i 95 %add9.us.i.3.i = add nsw i32 %mul7.us.i.3.i, %A6 96 %inc.us.i.3.i = or disjoint i32 %j.026.us.i.i, 1 97 %add.us.i.1337.i = add i32 %inc.us.i.i, %mul.us.i118.i 98 %arrayidx4.us.i.1338.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1337.i 99 %B5 = load i16, ptr %arrayidx4.us.i.1338.i, align 2 100 %conv.us.i.1339.i = sext i16 %B5 to i32 101 %arrayidx5.us.i.1340.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.i 102 %B6 = load i16, ptr %arrayidx5.us.i.1340.i, align 2 103 %conv6.us.i.1341.i = sext i16 %B6 to i32 104 %mul7.us.i.1342.i = mul nsw i32 %conv6.us.i.1341.i, %conv.us.i.1339.i 105 %add9.us.i.1343.i = add nsw i32 %mul7.us.i.1342.i, %add9.us.i.i 106 %inc.us.i.1344.i = or disjoint i32 %j.026.us.i.i, 2 107 %add.us.i.1.1.i = add i32 %inc.us.i.1.i, %mul.us.i118.1.i 108 %arrayidx4.us.i.1.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.1.i 109 %B7 = load i16, ptr %arrayidx4.us.i.1.1.i, align 2 110 %conv.us.i.1.1.i = sext i16 %B7 to i32 111 %arrayidx5.us.i.1.1.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1.i 112 %B6.dup = load i16, ptr %arrayidx5.us.i.1.1.i, align 2 113 %conv6.us.i.1.1.i = sext i16 %B6.dup to i32 114 %mul7.us.i.1.1.i = mul nsw i32 %conv6.us.i.1.1.i, %conv.us.i.1.1.i 115 %add9.us.i.1.1.i = add nsw i32 %mul7.us.i.1.1.i, %add9.us.i.1.i 116 %inc.us.i.1.1.i = or disjoint i32 %j.026.us.i.i, 2 117 %add.us.i.2.1.i = add i32 %inc.us.i.2.i, %mul.us.i118.2.i 118 %arrayidx4.us.i.2.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.1.i 119 %B9 = load i16, ptr %arrayidx4.us.i.2.1.i, align 2 120 %conv.us.i.2.1.i = sext i16 %B9 to i32 121 %arrayidx5.us.i.2.1.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2.i 122 %B6.dup.i = load i16, ptr %arrayidx5.us.i.2.1.i, align 2 123 %conv6.us.i.2.1.i = sext i16 %B6.dup.i to i32 124 %mul7.us.i.2.1.i = mul nsw i32 %conv6.us.i.2.1.i, %conv.us.i.2.1.i 125 %add9.us.i.2.1.i = add nsw i32 %mul7.us.i.2.1.i, %add9.us.i.2.i 126 %inc.us.i.2.1.i = or disjoint i32 %j.026.us.i.i, 2 127 %add.us.i.3.1.i = add i32 %inc.us.i.3.i, %mul.us.i118.3.i 128 %arrayidx4.us.i.3.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.1.i 129 %B11 = load i16, ptr %arrayidx4.us.i.3.1.i, align 2 130 %conv.us.i.3.1.i = sext i16 %B11 to i32 131 %arrayidx5.us.i.3.1.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.3.i 132 %B6.dup.i.i = load i16, ptr %arrayidx5.us.i.3.1.i, align 2 133 %conv6.us.i.3.1.i = sext i16 %B6.dup.i.i to i32 134 %mul7.us.i.3.1.i = mul nsw i32 %conv6.us.i.3.1.i, %conv.us.i.3.1.i 135 %add9.us.i.3.1.i = add nsw i32 %mul7.us.i.3.1.i, %add9.us.i.3.i 136 %inc.us.i.3.1.i = or disjoint i32 %j.026.us.i.i, 2 137 %add.us.i.2346.i = add i32 %inc.us.i.1344.i, %mul.us.i118.i 138 %arrayidx4.us.i.2347.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2346.i 139 %B13 = load i16, ptr %arrayidx4.us.i.2347.i, align 2 140 %conv.us.i.2348.i = sext i16 %B13 to i32 141 %arrayidx5.us.i.2349.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1344.i 142 %B14 = load i16, ptr %arrayidx5.us.i.2349.i, align 2 143 %conv6.us.i.2350.i = sext i16 %B14 to i32 144 %mul7.us.i.2351.i = mul nsw i32 %conv6.us.i.2350.i, %conv.us.i.2348.i 145 %add9.us.i.2352.i = add nsw i32 %mul7.us.i.2351.i, %add9.us.i.1343.i 146 %inc.us.i.2353.i = or disjoint i32 %j.026.us.i.i, 3 147 %add.us.i.1.2.i = add i32 %inc.us.i.1.1.i, %mul.us.i118.1.i 148 %arrayidx4.us.i.1.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.2.i 149 %B15 = load i16, ptr %arrayidx4.us.i.1.2.i, align 2 150 %conv.us.i.1.2.i = sext i16 %B15 to i32 151 %arrayidx5.us.i.1.2.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1.1.i 152 %B14.dup = load i16, ptr %arrayidx5.us.i.1.2.i, align 2 153 %conv6.us.i.1.2.i = sext i16 %B14.dup to i32 154 %mul7.us.i.1.2.i = mul nsw i32 %conv6.us.i.1.2.i, %conv.us.i.1.2.i 155 %add9.us.i.1.2.i = add nsw i32 %mul7.us.i.1.2.i, %add9.us.i.1.1.i 156 %inc.us.i.1.2.i = or disjoint i32 %j.026.us.i.i, 3 157 %add.us.i.2.2.i = add i32 %inc.us.i.2.1.i, %mul.us.i118.2.i 158 %arrayidx4.us.i.2.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.2.i 159 %B17 = load i16, ptr %arrayidx4.us.i.2.2.i, align 2 160 %conv.us.i.2.2.i = sext i16 %B17 to i32 161 %arrayidx5.us.i.2.2.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2.1.i 162 %B14.dup.i = load i16, ptr %arrayidx5.us.i.2.2.i, align 2 163 %conv6.us.i.2.2.i = sext i16 %B14.dup.i to i32 164 %mul7.us.i.2.2.i = mul nsw i32 %conv6.us.i.2.2.i, %conv.us.i.2.2.i 165 %add9.us.i.2.2.i = add nsw i32 %mul7.us.i.2.2.i, %add9.us.i.2.1.i 166 %inc.us.i.2.2.i = or disjoint i32 %j.026.us.i.i, 3 167 %add.us.i.3.2.i = add i32 %inc.us.i.3.1.i, %mul.us.i118.3.i 168 %arrayidx4.us.i.3.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.2.i 169 %B19 = load i16, ptr %arrayidx4.us.i.3.2.i, align 2 170 %conv.us.i.3.2.i = sext i16 %B19 to i32 171 %arrayidx5.us.i.3.2.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.3.1.i 172 %B14.dup.i.i = load i16, ptr %arrayidx5.us.i.3.2.i, align 2 173 %conv6.us.i.3.2.i = sext i16 %B14.dup.i.i to i32 174 %mul7.us.i.3.2.i = mul nsw i32 %conv6.us.i.3.2.i, %conv.us.i.3.2.i 175 %add9.us.i.3.2.i = add nsw i32 %mul7.us.i.3.2.i, %add9.us.i.3.1.i 176 %inc.us.i.3.2.i = or disjoint i32 %j.026.us.i.i, 3 177 %add.us.i.3355.i = add i32 %inc.us.i.2353.i, %mul.us.i118.i 178 %arrayidx4.us.i.3356.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3355.i 179 %B21 = load i16, ptr %arrayidx4.us.i.3356.i, align 2 180 %conv.us.i.3357.i = sext i16 %B21 to i32 181 %arrayidx5.us.i.3358.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2353.i 182 %B22 = load i16, ptr %arrayidx5.us.i.3358.i, align 2 183 %conv6.us.i.3359.i = sext i16 %B22 to i32 184 %mul7.us.i.3360.i = mul nsw i32 %conv6.us.i.3359.i, %conv.us.i.3357.i 185 %add9.us.i.3361.i = add nsw i32 %mul7.us.i.3360.i, %add9.us.i.2352.i 186 %inc.us.i.3362.i = add i32 %j.026.us.i.i, 4 187 %add.us.i.1.3.i = add i32 %inc.us.i.1.2.i, %mul.us.i118.1.i 188 %arrayidx4.us.i.1.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.3.i 189 %B23 = load i16, ptr %arrayidx4.us.i.1.3.i, align 2 190 %conv.us.i.1.3.i = sext i16 %B23 to i32 191 %arrayidx5.us.i.1.3.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1.2.i 192 %B22.dup = load i16, ptr %arrayidx5.us.i.1.3.i, align 2 193 %conv6.us.i.1.3.i = sext i16 %B22.dup to i32 194 %mul7.us.i.1.3.i = mul nsw i32 %conv6.us.i.1.3.i, %conv.us.i.1.3.i 195 %add9.us.i.1.3.i = add nsw i32 %mul7.us.i.1.3.i, %add9.us.i.1.2.i 196 %add.us.i.2.3.i = add i32 %inc.us.i.2.2.i, %mul.us.i118.2.i 197 %arrayidx4.us.i.2.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.3.i 198 %B25 = load i16, ptr %arrayidx4.us.i.2.3.i, align 2 199 %conv.us.i.2.3.i = sext i16 %B25 to i32 200 %arrayidx5.us.i.2.3.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2.2.i 201 %B22.dup.i = load i16, ptr %arrayidx5.us.i.2.3.i, align 2 202 %conv6.us.i.2.3.i = sext i16 %B22.dup.i to i32 203 %mul7.us.i.2.3.i = mul nsw i32 %conv6.us.i.2.3.i, %conv.us.i.2.3.i 204 %add9.us.i.2.3.i = add nsw i32 %mul7.us.i.2.3.i, %add9.us.i.2.2.i 205 %add.us.i.3.3.i = add i32 %inc.us.i.3.2.i, %mul.us.i118.3.i 206 %arrayidx4.us.i.3.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.3.i 207 %B27 = load i16, ptr %arrayidx4.us.i.3.3.i, align 2 208 %conv.us.i.3.3.i = sext i16 %B27 to i32 209 %arrayidx5.us.i.3.3.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.3.2.i 210 %B22.dup.i.i = load i16, ptr %arrayidx5.us.i.3.3.i, align 2 211 %conv6.us.i.3.3.i = sext i16 %B22.dup.i.i to i32 212 %mul7.us.i.3.3.i = mul nsw i32 %conv6.us.i.3.3.i, %conv.us.i.3.3.i 213 %add9.us.i.3.3.i = add nsw i32 %mul7.us.i.3.3.i, %add9.us.i.3.2.i 214 %niter335.nsub.3.i = add i32 %niter335.i, -4 215 %niter335.ncmp.3.i = icmp eq i32 %niter335.nsub.3.i, 0 216 br i1 %niter335.ncmp.3.i, label %exit, label %for.body 217 218exit: 219 store i32 %add9.us.i.3361.i, ptr %res, align 4 220 %arrayidx.out.1.i = getelementptr inbounds i32, ptr %res, i32 1 221 store i32 %add9.us.i.1.3.i, ptr %arrayidx.out.1.i, align 4 222 %arrayidx.out.2.i = getelementptr inbounds i32, ptr %res, i32 2 223 store i32 %add9.us.i.2.3.i, ptr %arrayidx.out.2.i, align 4 224 %arrayidx.out.3.i = getelementptr inbounds i32, ptr %res, i32 3 225 store i32 %add9.us.i.3.3.i, ptr %arrayidx.out.3.i, align 4 226 ret void 227} 228