1; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s 2 3; CHECK-LABEL: single_block 4; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 5; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 6; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 7; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 8; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 %acc) 9define i32 @single_block(i16* %a, i16* %b, i32 %acc) { 10entry: 11 %ld.a.0 = load i16, i16* %a 12 %sext.a.0 = sext i16 %ld.a.0 to i32 13 %ld.b.0 = load i16, i16* %b 14 %sext.b.0 = sext i16 %ld.b.0 to i32 15 %mul.0 = mul i32 %sext.a.0, %sext.b.0 16 %addr.a.1 = getelementptr i16, i16* %a, i32 1 17 %addr.b.1 = getelementptr i16, i16* %b, i32 1 18 %ld.a.1 = load i16, i16* %addr.a.1 19 %sext.a.1 = sext i16 %ld.a.1 to i32 20 %ld.b.1 = load i16, i16* %addr.b.1 21 %sext.b.1 = sext i16 %ld.b.1 to i32 22 %mul.1 = mul i32 %sext.a.1, %sext.b.1 23 %add = add i32 %mul.0, %mul.1 24 %res = add i32 %add, %acc 25 ret i32 %res 26} 27 28; CHECK-LABEL: single_block_64 29; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 30; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 31; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 32; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 33; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 %acc) 34define i64 @single_block_64(i16* %a, i16* %b, i64 %acc) { 35entry: 36 %ld.a.0 = load i16, i16* %a 37 %sext.a.0 = sext i16 %ld.a.0 to i32 38 %ld.b.0 = load i16, i16* %b 39 %sext.b.0 = sext i16 %ld.b.0 to i32 40 %mul.0 = mul i32 %sext.a.0, %sext.b.0 41 %addr.a.1 = getelementptr i16, i16* %a, i32 1 42 %addr.b.1 = getelementptr i16, i16* %b, i32 1 43 %ld.a.1 = load i16, i16* %addr.a.1 44 %sext.a.1 = sext i16 %ld.a.1 to i32 45 %ld.b.1 = load i16, i16* %addr.b.1 46 %sext.b.1 = sext i16 %ld.b.1 to i32 47 %mul.1 = mul i32 %sext.a.1, %sext.b.1 48 %sext.mul.0 = sext i32 %mul.0 to i64 49 %sext.mul.1 = sext i32 %mul.1 to i64 50 %add = add i64 %sext.mul.0, %sext.mul.1 51 %res = add i64 %add, %acc 52 ret i64 %res 53} 54 55; CHECK-LABEL: multi_block 56; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 57; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 58; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 59; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 60; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 0) 61define i32 @multi_block(i16* %a, i16* %b, i32 %acc) { 62entry: 63 %ld.a.0 = load i16, i16* %a 64 %sext.a.0 = sext i16 %ld.a.0 to i32 65 %ld.b.0 = load i16, i16* %b 66 %sext.b.0 = sext i16 %ld.b.0 to i32 67 %mul.0 = mul i32 %sext.a.0, %sext.b.0 68 %addr.a.1 = getelementptr i16, i16* %a, i32 1 69 %addr.b.1 = getelementptr i16, i16* %b, i32 1 70 %ld.a.1 = load i16, i16* %addr.a.1 71 %sext.a.1 = sext i16 %ld.a.1 to i32 72 %ld.b.1 = load i16, i16* %addr.b.1 73 %sext.b.1 = sext i16 %ld.b.1 to i32 74 %mul.1 = mul i32 %sext.a.1, %sext.b.1 75 %add = add i32 %mul.0, %mul.1 76 br label %bb.1 77 78bb.1: 79 %res = add i32 %add, %acc 80 ret i32 %res 81} 82 83; CHECK-LABEL: multi_block_64 84; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 85; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] 86; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 87; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] 88; CHECK: call i64 @llvm.arm.smlald(i32 [[A]], i32 [[B]], i64 0) 89define i64 @multi_block_64(i16* %a, i16* %b, i64 %acc) { 90entry: 91 %ld.a.0 = load i16, i16* %a 92 %sext.a.0 = sext i16 %ld.a.0 to i32 93 %ld.b.0 = load i16, i16* %b 94 %sext.b.0 = sext i16 %ld.b.0 to i32 95 %mul.0 = mul i32 %sext.a.0, %sext.b.0 96 %addr.a.1 = getelementptr i16, i16* %a, i32 1 97 %addr.b.1 = getelementptr i16, i16* %b, i32 1 98 %ld.a.1 = load i16, i16* %addr.a.1 99 %sext.a.1 = sext i16 %ld.a.1 to i32 100 %ld.b.1 = load i16, i16* %addr.b.1 101 %sext.b.1 = sext i16 %ld.b.1 to i32 102 %mul.1 = mul i32 %sext.a.1, %sext.b.1 103 %sext.mul.0 = sext i32 %mul.0 to i64 104 %sext.mul.1 = sext i32 %mul.1 to i64 105 %add = add i64 %sext.mul.0, %sext.mul.1 106 br label %bb.1 107 108bb.1: 109 %res = add i64 %add, %acc 110 ret i64 %res 111} 112 113; CHECK-LABEL: multi_block_1 114; CHECK-NOT: call i32 @llvm.arm.smlad 115define i32 @multi_block_1(i16* %a, i16* %b, i32 %acc) { 116entry: 117 %ld.a.0 = load i16, i16* %a 118 %sext.a.0 = sext i16 %ld.a.0 to i32 119 %ld.b.0 = load i16, i16* %b 120 %sext.b.0 = sext i16 %ld.b.0 to i32 121 %mul.0 = mul i32 %sext.a.0, %sext.b.0 122 br label %bb.1 123 124bb.1: 125 %addr.a.1 = getelementptr i16, i16* %a, i32 1 126 %addr.b.1 = getelementptr i16, i16* %b, i32 1 127 %ld.a.1 = load i16, i16* %addr.a.1 128 %sext.a.1 = sext i16 %ld.a.1 to i32 129 %ld.b.1 = load i16, i16* %addr.b.1 130 %sext.b.1 = sext i16 %ld.b.1 to i32 131 %mul.1 = mul i32 %sext.a.1, %sext.b.1 132 %add = add i32 %mul.0, %mul.1 133 %res = add i32 %add, %acc 134 ret i32 %res 135} 136 137