1; RUN: llc -mtriple=thumbv7em -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT 2; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT 3; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -lsr-backedge-indexing=false %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED 4; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED 5; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -lsr-complexity-limit=2147483647 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-COMPLEX 6 7; CHECK-LABEL: test_qadd_2 8; CHECK: @ %loop 9 10; CHECK-DEFAULT: ldr{{.*}}, #4] 11; CHECK-DEFAULT: ldr{{.*}}, #4] 12; CHECK-DEFAULT: str{{.*}}, #4] 13; CHECK-DEFAULT: ldr{{.*}}, #8]! 14; CHECK-DEAFULT: ldr{{.*}}, #8]! 15; CHECK-DEFAULT: str{{.*}}, #8]! 16 17; CHECK-COMPLEX: ldr{{.*}}, #8]! 18; CHECK-COMPLEX: ldr{{.*}}, #8]! 19; CHECK-COMPLEX: str{{.*}}, #8]! 20; CHECK-COMPLEX: ldr{{.*}}, #4] 21; CHECK-COMPLEX: ldr{{.*}}, #4] 22; CHECK-COMPLEX: str{{.*}}, #4] 23 24; DISABLED-NOT: ldr{{.*}}]! 25; DISABLED-NOT: str{{.*}}]! 26 27define void @test_qadd_2(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) { 28entry: 29 br label %loop 30 31loop: 32 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 33 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ] 34 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1 35 %a.1 = load i32, i32* %gep.a.1 36 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1 37 %b.1 = load i32, i32* %gep.b.1 38 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1) 39 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1 40 store i32 %qadd.1, i32* %addr.1 41 %idx.2 = or i32 %idx.1, 1 42 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2 43 %a.2 = load i32, i32* %gep.a.2 44 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2 45 %b.2 = load i32, i32* %gep.b.2 46 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2) 47 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2 48 store i32 %qadd.2, i32* %addr.2 49 %i.next = add nsw nuw i32 %i, -2 50 %idx.next = add nsw nuw i32 %idx.1, 2 51 %cmp = icmp ult i32 %i.next, %N 52 br i1 %cmp, label %loop, label %exit 53 54exit: 55 ret void 56} 57 58; CHECK-LABEL: test_qadd_2_backwards 59; TODO: Indexes should be generated. 60 61; CHECK: @ %loop 62 63; CHECK-DEFAULT: ldr{{.*}}, 64; CHECK-DEFAULT: ldr{{.*}}, 65; CHECK-DEFAULT: str{{.*}}, 66; CHECK-DEFAULT: ldr{{.*}}, #-4] 67; CHECK-DEFAULT: ldr{{.*}}, #-4] 68; CHECK-DEFAULT: sub{{.*}}, #8 69; CHECK-DEFAULT: str{{.*}}, #-4] 70; CHECK-DEFAULT: sub{{.*}}, #8 71 72; CHECK-COMPLEX: ldr{{.*}} lsl #2] 73; CHECK-COMPLEX: ldr{{.*}} lsl #2] 74; CHECK-COMPLEX: str{{.*}} lsl #2] 75; CHECK-COMPLEX: ldr{{.*}} lsl #2] 76; CHECK-COMPLEX: ldr{{.*}} lsl #2] 77; CHECK-COMPLEX: str{{.*}} lsl #2] 78 79; DISABLED-NOT: ldr{{.*}}]! 80; DISABLED-NOT: str{{.*}}]! 81 82define void @test_qadd_2_backwards(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) { 83entry: 84 br label %loop 85 86loop: 87 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 88 %idx.1 = phi i32 [ %N, %entry ], [ %idx.next, %loop ] 89 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1 90 %a.1 = load i32, i32* %gep.a.1 91 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1 92 %b.1 = load i32, i32* %gep.b.1 93 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1) 94 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1 95 store i32 %qadd.1, i32* %addr.1 96 %idx.2 = sub nsw nuw i32 %idx.1, 1 97 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2 98 %a.2 = load i32, i32* %gep.a.2 99 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2 100 %b.2 = load i32, i32* %gep.b.2 101 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2) 102 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2 103 store i32 %qadd.2, i32* %addr.2 104 %i.next = add nsw nuw i32 %i, -2 105 %idx.next = sub nsw nuw i32 %idx.1, 2 106 %cmp = icmp ult i32 %i.next, %N 107 br i1 %cmp, label %loop, label %exit 108 109exit: 110 ret void 111} 112 113; CHECK-LABEL: test_qadd_3 114; CHECK: @ %loop 115 116; CHECK-DEFAULT: ldr{{.*}}, #8] 117; CHECK-DEFAULT: ldr{{.*}}, #8] 118; CHECK-DEFAULT: str{{.*}}, #8] 119; CHECK-DEFAULT: ldr{{.*}}, #12]! 120; CHECK-DEFAULT: ldr{{.*}}, #12]! 121; CHECK-DEFAULT: str{{.*}}, #12]! 122 123; CHECK-COMPLEX: ldr{{.*}}, #12]! 124; CHECK-COMPLEX: ldr{{.*}}, #12]! 125; CHECK-COMPLEX: str{{.*}}, #12]! 126; CHECK-COMPLEX: ldr{{.*}}, #4] 127; CHECK-COMPLEX: ldr{{.*}}, #4] 128; CHECK-COMPLEX: str{{.*}}, #4] 129; CHECK-COMPLEX: ldr{{.*}}, #8] 130; CHECK-COMPLEX: ldr{{.*}}, #8] 131; CHECK-COMPLEX: str{{.*}}, #8] 132 133; DISABLED-NOT: ldr{{.*}}]! 134; DISABLED-NOT: str{{.*}}]! 135 136define void @test_qadd_3(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) { 137entry: 138 br label %loop 139 140loop: 141 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 142 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ] 143 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1 144 %a.1 = load i32, i32* %gep.a.1 145 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1 146 %b.1 = load i32, i32* %gep.b.1 147 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1) 148 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1 149 store i32 %qadd.1, i32* %addr.1 150 %idx.2 = add nuw nsw i32 %idx.1, 1 151 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2 152 %a.2 = load i32, i32* %gep.a.2 153 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2 154 %b.2 = load i32, i32* %gep.b.2 155 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2) 156 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2 157 store i32 %qadd.2, i32* %addr.2 158 %idx.3 = add nuw nsw i32 %idx.1, 2 159 %gep.a.3 = getelementptr inbounds i32, i32* %a.array, i32 %idx.3 160 %a.3 = load i32, i32* %gep.a.3 161 %gep.b.3 = getelementptr inbounds i32, i32* %b.array, i32 %idx.3 162 %b.3 = load i32, i32* %gep.b.3 163 %qadd.3 = call i32 @llvm.arm.qadd(i32 %a.3, i32 %b.3) 164 %addr.3 = getelementptr inbounds i32, i32* %out.array, i32 %idx.3 165 store i32 %qadd.3, i32* %addr.3 166 %i.next = add nsw nuw i32 %i, -3 167 %idx.next = add nsw nuw i32 %idx.1, 3 168 %cmp = icmp ult i32 %i.next, %N 169 br i1 %cmp, label %loop, label %exit 170 171exit: 172 ret void 173} 174 175; CHECK-LABEL: test_qadd_4 176; CHECK: @ %loop 177 178; TODO: pre-inc store 179 180; CHECK-DEFAULT: ldr{{.*}}, #4] 181; CHECK-DEFAULT: ldr{{.*}}, #4] 182; CHECK-DEFAULT: str{{.*}}, #4] 183; CHECK-DEFAULT: ldr{{.*}}, #8] 184; CHECK-DEFAULT: ldr{{.*}}, #8] 185; CHECK-DEFAULT: str{{.*}}, #8] 186; CHECK-DEFAULT: ldr{{.*}}, #12] 187; CHECK-DEFAULT: ldr{{.*}}, #12] 188; CHECK-DEFAULT: str{{.*}}, #12] 189 190; CHECK-COMPLEX: ldr{{.*}}, #16]! 191; CHECK-COMPLEX: ldr{{.*}}, #16]! 192; CHECK-COMPLEX: str{{.*}}, #16]! 193; CHECK-COMPLEX: ldr{{.*}}, #4] 194; CHECK-COMPLEX: ldr{{.*}}, #4] 195; CHECK-COMPLEX: str{{.*}}, #4] 196; CHECK-COMPLEX: ldr{{.*}}, #8] 197; CHECK-COMPLEX: ldr{{.*}}, #8] 198; CHECK-COMPLEX: str{{.*}}, #8] 199; CHECK-COMPLEX: ldr{{.*}}, #12] 200; CHECK-COMPLEX: ldr{{.*}}, #12] 201; CHECK-COMPLEX: str{{.*}}, #12] 202 203; DISABLED-NOT: ldr{{.*}}]! 204; DISABLED-NOT: str{{.*}}]! 205 206define void @test_qadd_4(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) { 207entry: 208 br label %loop 209 210loop: 211 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 212 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ] 213 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1 214 %a.1 = load i32, i32* %gep.a.1 215 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1 216 %b.1 = load i32, i32* %gep.b.1 217 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1) 218 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1 219 store i32 %qadd.1, i32* %addr.1 220 %idx.2 = or i32 %idx.1, 1 221 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2 222 %a.2 = load i32, i32* %gep.a.2 223 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2 224 %b.2 = load i32, i32* %gep.b.2 225 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2) 226 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2 227 store i32 %qadd.2, i32* %addr.2 228 %idx.3 = or i32 %idx.1, 2 229 %gep.a.3 = getelementptr inbounds i32, i32* %a.array, i32 %idx.3 230 %a.3 = load i32, i32* %gep.a.3 231 %gep.b.3 = getelementptr inbounds i32, i32* %b.array, i32 %idx.3 232 %b.3 = load i32, i32* %gep.b.3 233 %qadd.3 = call i32 @llvm.arm.qadd(i32 %a.3, i32 %b.3) 234 %addr.3 = getelementptr inbounds i32, i32* %out.array, i32 %idx.3 235 store i32 %qadd.3, i32* %addr.3 236 %idx.4 = or i32 %idx.1, 3 237 %gep.a.4 = getelementptr inbounds i32, i32* %a.array, i32 %idx.4 238 %a.4 = load i32, i32* %gep.a.4 239 %gep.b.4 = getelementptr inbounds i32, i32* %b.array, i32 %idx.4 240 %b.4 = load i32, i32* %gep.b.4 241 %qadd.4 = call i32 @llvm.arm.qadd(i32 %a.4, i32 %b.4) 242 %addr.4 = getelementptr inbounds i32, i32* %out.array, i32 %idx.4 243 store i32 %qadd.4, i32* %addr.4 244 %i.next = add nsw nuw i32 %i, -4 245 %idx.next = add nsw nuw i32 %idx.1, 4 246 %cmp = icmp ult i32 %i.next, %N 247 br i1 %cmp, label %loop, label %exit 248 249exit: 250 ret void 251} 252 253; CHECK-LABEL: test_qadd16_2 254; CHECK: @ %loop 255; TODO: pre-inc store. 256 257; CHECK-DEFAULT: ldr{{.*}}, #4] 258; CHECK-DEFAULT: ldr{{.*}}, #4] 259; CHECK-DEFAULT: str{{.*}}, #8] 260; CHECK-DEFAULT: ldr{{.*}}, #8]! 261; CHECK-DEFAULT: ldr{{.*}}, #8]! 262; CHECK-DEFAULT: str{{.*}}, #16]! 263 264; CHECK-COMPLEX: ldr{{.*}}, #8]! 265; CHECK-COMPLEX: ldr{{.*}}, #8]! 266; CHECK-COMPLEX: str{{.*}}, #16]! 267; CHECK-COMPLEX: ldr{{.*}}, #4] 268; CHECK-COMPLEX: ldr{{.*}}, #4] 269; CHECK-COMPLEX: str{{.*}}, #8] 270 271; DISABLED-NOT: ldr{{.*}}]! 272; DISABLED-NOT: str{{.*}}]! 273 274define void @test_qadd16_2(i16* %a.array, i16* %b.array, i32* %out.array, i32 %N) { 275entry: 276 br label %loop 277 278loop: 279 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 280 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ] 281 %gep.a.1 = getelementptr inbounds i16, i16* %a.array, i32 %idx.1 282 %cast.a.1 = bitcast i16* %gep.a.1 to i32* 283 %a.1 = load i32, i32* %cast.a.1 284 %gep.b.1 = getelementptr inbounds i16, i16* %b.array, i32 %idx.1 285 %cast.b.1 = bitcast i16* %gep.b.1 to i32* 286 %b.1 = load i32, i32* %cast.b.1 287 %qadd.1 = call i32 @llvm.arm.qadd16(i32 %a.1, i32 %b.1) 288 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1 289 store i32 %qadd.1, i32* %addr.1 290 %idx.2 = add nsw nuw i32 %idx.1, 2 291 %gep.a.2 = getelementptr inbounds i16, i16* %a.array, i32 %idx.2 292 %cast.a.2 = bitcast i16* %gep.a.2 to i32* 293 %a.2 = load i32, i32* %cast.a.2 294 %gep.b.2 = getelementptr inbounds i16, i16* %b.array, i32 %idx.2 295 %cast.b.2 = bitcast i16* %gep.b.2 to i32* 296 %b.2 = load i32, i32* %cast.b.2 297 %qadd.2 = call i32 @llvm.arm.qadd16(i32 %a.2, i32 %b.2) 298 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2 299 store i32 %qadd.2, i32* %addr.2 300 %i.next = add nsw nuw i32 %i, -2 301 %idx.next = add nsw nuw i32 %idx.1, 4 302 %cmp = icmp ult i32 %i.next, %N 303 br i1 %cmp, label %loop, label %exit 304 305exit: 306 ret void 307} 308 309declare i32 @llvm.arm.qadd(i32, i32) 310declare i32 @llvm.arm.qadd16(i32, i32) 311