1; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s 2 3; CHECK-LABEL: exchange_1 4; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 5; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 6; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 7; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 8; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]] 9define i32 @exchange_1(i16* %a, i16* %b, i32 %acc) { 10entry: 11 %addr.a.1 = getelementptr i16, i16* %a, i32 1 12 %addr.b.1 = getelementptr i16, i16* %b, i32 1 13 %ld.a.0 = load i16, i16* %a 14 %sext.a.0 = sext i16 %ld.a.0 to i32 15 %ld.b.0 = load i16, i16* %b 16 %ld.a.1 = load i16, i16* %addr.a.1 17 %ld.b.1 = load i16, i16* %addr.b.1 18 %sext.a.1 = sext i16 %ld.a.1 to i32 19 %sext.b.1 = sext i16 %ld.b.1 to i32 20 %sext.b.0 = sext i16 %ld.b.0 to i32 21 %mul.0 = mul i32 %sext.a.0, %sext.b.1 22 %mul.1 = mul i32 %sext.a.1, %sext.b.0 23 %add = add i32 %mul.0, %mul.1 24 %res = add i32 %add, %acc 25 ret i32 %res 26} 27 28; CHECK-LABEL: exchange_2 29; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 30; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 31; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 32; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 33; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]] 34define i32 @exchange_2(i16* %a, i16* %b, i32 %acc) { 35entry: 36 %addr.a.1 = getelementptr i16, i16* %a, i32 1 37 %addr.b.1 = getelementptr i16, i16* %b, i32 1 38 %ld.a.0 = load i16, i16* %a 39 %sext.a.0 = sext i16 %ld.a.0 to i32 40 %ld.b.0 = load i16, i16* %b 41 %ld.a.1 = load i16, i16* %addr.a.1 42 %ld.b.1 = load i16, i16* %addr.b.1 43 %sext.a.1 = sext i16 %ld.a.1 to i32 44 %sext.b.1 = sext i16 %ld.b.1 to i32 45 %sext.b.0 = sext i16 %ld.b.0 to i32 46 %mul.0 = mul i32 %sext.b.1, %sext.a.0 47 %mul.1 = mul i32 %sext.b.0, %sext.a.1 48 %add = add i32 %mul.0, %mul.1 49 %res = add i32 %add, %acc 50 ret i32 %res 51} 52 53; CHECK-LABEL: exchange_3 54; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 55; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 56; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 57; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 58; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]] 59define i32 @exchange_3(i16* %a, i16* %b, i32 %acc) { 60entry: 61 %addr.a.1 = getelementptr i16, i16* %a, i32 1 62 %addr.b.1 = getelementptr i16, i16* %b, i32 1 63 %ld.a.0 = load i16, i16* %a 64 %sext.a.0 = sext i16 %ld.a.0 to i32 65 %ld.b.0 = load i16, i16* %b 66 %ld.a.1 = load i16, i16* %addr.a.1 67 %ld.b.1 = load i16, i16* %addr.b.1 68 %sext.a.1 = sext i16 %ld.a.1 to i32 69 %sext.b.1 = sext i16 %ld.b.1 to i32 70 %sext.b.0 = sext i16 %ld.b.0 to i32 71 %mul.0 = mul i32 %sext.a.0, %sext.b.1 72 %mul.1 = mul i32 %sext.a.1, %sext.b.0 73 %add = add i32 %mul.1, %mul.0 74 %res = add i32 %add, %acc 75 ret i32 %res 76} 77 78; CHECK-LABEL: exchange_4 79; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 80; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 81; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 82; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 83; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]] 84define i32 @exchange_4(i16* %a, i16* %b, i32 %acc) { 85entry: 86 %addr.a.1 = getelementptr i16, i16* %a, i32 1 87 %addr.b.1 = getelementptr i16, i16* %b, i32 1 88 %ld.a.0 = load i16, i16* %a 89 %sext.a.0 = sext i16 %ld.a.0 to i32 90 %ld.b.0 = load i16, i16* %b 91 %ld.a.1 = load i16, i16* %addr.a.1 92 %ld.b.1 = load i16, i16* %addr.b.1 93 %sext.a.1 = sext i16 %ld.a.1 to i32 94 %sext.b.1 = sext i16 %ld.b.1 to i32 95 %sext.b.0 = sext i16 %ld.b.0 to i32 96 %mul.0 = mul i32 %sext.b.1, %sext.a.0 97 %mul.1 = mul i32 %sext.b.0, %sext.a.1 98 %add = add i32 %mul.1, %mul.0 99 %res = add i32 %add, %acc 100 ret i32 %res 101} 102 103; CHECK-LABEL: exchange_multi_use_1 104; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 105; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 106; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 107; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 108; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2 109; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32* 110; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]] 111; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]], i32 %acc 112; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A_2]], i32 [[LD_B]], i32 [[X]]) 113define i32 @exchange_multi_use_1(i16* %a, i16* %b, i32 %acc) { 114entry: 115 %addr.a.1 = getelementptr i16, i16* %a, i32 1 116 %addr.b.1 = getelementptr i16, i16* %b, i32 1 117 %ld.a.0 = load i16, i16* %a 118 %sext.a.0 = sext i16 %ld.a.0 to i32 119 %ld.b.0 = load i16, i16* %b 120 %ld.a.1 = load i16, i16* %addr.a.1 121 %ld.b.1 = load i16, i16* %addr.b.1 122 %sext.a.1 = sext i16 %ld.a.1 to i32 123 %sext.b.1 = sext i16 %ld.b.1 to i32 124 %sext.b.0 = sext i16 %ld.b.0 to i32 125 %mul.0 = mul i32 %sext.a.0, %sext.b.1 126 %mul.1 = mul i32 %sext.a.1, %sext.b.0 127 %add = add i32 %mul.0, %mul.1 128 %addr.a.2 = getelementptr i16, i16* %a, i32 2 129 %addr.a.3 = getelementptr i16, i16* %a, i32 3 130 %ld.a.2 = load i16, i16* %addr.a.2 131 %ld.a.3 = load i16, i16* %addr.a.3 132 %sext.a.2 = sext i16 %ld.a.2 to i32 133 %sext.a.3 = sext i16 %ld.a.3 to i32 134 %mul.2 = mul i32 %sext.a.3, %sext.b.1 135 %mul.3 = mul i32 %sext.a.2, %sext.b.0 136 %add.1 = add i32 %mul.2, %mul.3 137 %add.2 = add i32 %add, %add.1 138 %res = add i32 %add.2, %acc 139 ret i32 %res 140} 141 142; CHECK-LABEL: exchange_multi_use_2 143; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 144; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 145; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 146; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 147; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2 148; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32* 149; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]] 150; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 %acc 151; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 [[X]]) 152define i32 @exchange_multi_use_2(i16* %a, i16* %b, i32 %acc) { 153entry: 154 %addr.a.1 = getelementptr i16, i16* %a, i32 1 155 %addr.b.1 = getelementptr i16, i16* %b, i32 1 156 %ld.a.0 = load i16, i16* %a 157 %sext.a.0 = sext i16 %ld.a.0 to i32 158 %ld.b.0 = load i16, i16* %b 159 %ld.a.1 = load i16, i16* %addr.a.1 160 %ld.b.1 = load i16, i16* %addr.b.1 161 %sext.a.1 = sext i16 %ld.a.1 to i32 162 %sext.b.1 = sext i16 %ld.b.1 to i32 163 %sext.b.0 = sext i16 %ld.b.0 to i32 164 %mul.0 = mul i32 %sext.a.0, %sext.b.0 165 %mul.1 = mul i32 %sext.a.1, %sext.b.1 166 %add = add i32 %mul.0, %mul.1 167 %addr.a.2 = getelementptr i16, i16* %a, i32 2 168 %addr.a.3 = getelementptr i16, i16* %a, i32 3 169 %ld.a.2 = load i16, i16* %addr.a.2 170 %ld.a.3 = load i16, i16* %addr.a.3 171 %sext.a.2 = sext i16 %ld.a.2 to i32 172 %sext.a.3 = sext i16 %ld.a.3 to i32 173 %mul.2 = mul i32 %sext.b.0, %sext.a.3 174 %mul.3 = mul i32 %sext.b.1, %sext.a.2 175 %add.1 = add i32 %mul.2, %mul.3 176 %add.2 = add i32 %add, %add.1 177 %res = add i32 %add.2, %acc 178 ret i32 %res 179} 180 181; TODO: Why aren't two intrinsics generated? 182; CHECK-LABEL: exchange_multi_use_3 183; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 184; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 185; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2 186; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32* 187; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]] 188; CHECK-NOT: call i32 @llvm.arm.smlad 189; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 0 190define i32 @exchange_multi_use_3(i16* %a, i16* %b, i32 %acc) { 191entry: 192 %addr.a.1 = getelementptr i16, i16* %a, i32 1 193 %addr.b.1 = getelementptr i16, i16* %b, i32 1 194 %ld.a.0 = load i16, i16* %a 195 %sext.a.0 = sext i16 %ld.a.0 to i32 196 %ld.b.0 = load i16, i16* %b 197 %ld.a.1 = load i16, i16* %addr.a.1 198 %ld.b.1 = load i16, i16* %addr.b.1 199 %sext.a.1 = sext i16 %ld.a.1 to i32 200 %sext.b.1 = sext i16 %ld.b.1 to i32 201 %sext.b.0 = sext i16 %ld.b.0 to i32 202 %addr.a.2 = getelementptr i16, i16* %a, i32 2 203 %addr.a.3 = getelementptr i16, i16* %a, i32 3 204 %ld.a.2 = load i16, i16* %addr.a.2 205 %ld.a.3 = load i16, i16* %addr.a.3 206 %sext.a.2 = sext i16 %ld.a.2 to i32 207 %sext.a.3 = sext i16 %ld.a.3 to i32 208 %mul.2 = mul i32 %sext.b.0, %sext.a.3 209 %mul.3 = mul i32 %sext.b.1, %sext.a.2 210 %mul.0 = mul i32 %sext.a.0, %sext.b.0 211 %mul.1 = mul i32 %sext.a.1, %sext.b.1 212 %add = add i32 %mul.0, %mul.1 213 %add.1 = add i32 %mul.2, %mul.3 214 %sub = sub i32 %add, %add.1 215 %res = add i32 %acc, %sub 216 ret i32 %res 217} 218 219; TODO: Why isn't smladx generated too? 220; CHECK-LABEL: exchange_multi_use_4 221; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 222; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 223; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 224; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 225; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 0 226; CHECK-NOT: call i32 @llvm.arm.smlad 227define i32 @exchange_multi_use_4(i16* %a, i16* %b, i32 %acc) { 228entry: 229 %addr.a.1 = getelementptr i16, i16* %a, i32 1 230 %addr.b.1 = getelementptr i16, i16* %b, i32 1 231 %ld.a.0 = load i16, i16* %a 232 %sext.a.0 = sext i16 %ld.a.0 to i32 233 %ld.b.0 = load i16, i16* %b 234 %ld.a.1 = load i16, i16* %addr.a.1 235 %ld.b.1 = load i16, i16* %addr.b.1 236 %sext.a.1 = sext i16 %ld.a.1 to i32 237 %sext.b.1 = sext i16 %ld.b.1 to i32 238 %sext.b.0 = sext i16 %ld.b.0 to i32 239 %addr.a.2 = getelementptr i16, i16* %a, i32 2 240 %addr.a.3 = getelementptr i16, i16* %a, i32 3 241 %ld.a.2 = load i16, i16* %addr.a.2 242 %ld.a.3 = load i16, i16* %addr.a.3 243 %sext.a.2 = sext i16 %ld.a.2 to i32 244 %sext.a.3 = sext i16 %ld.a.3 to i32 245 %mul.2 = mul i32 %sext.b.0, %sext.a.3 246 %mul.3 = mul i32 %sext.b.1, %sext.a.2 247 %mul.0 = mul i32 %sext.a.0, %sext.b.0 248 %mul.1 = mul i32 %sext.a.1, %sext.b.1 249 %add.1 = add i32 %mul.2, %mul.3 250 %add = add i32 %mul.0, %mul.1 251 %sub = sub i32 %add, %add.1 252 %res = add i32 %acc, %sub 253 ret i32 %res 254} 255 256; CHECK-LABEL: exchange_swap 257; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 258; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 259; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 260; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 261; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]] 262define i32 @exchange_swap(i16* %a, i16* %b, i32 %acc) { 263entry: 264 %addr.a.1 = getelementptr i16, i16* %a, i32 1 265 %addr.b.1 = getelementptr i16, i16* %b, i32 1 266 %ld.a.0 = load i16, i16* %a 267 %sext.a.0 = sext i16 %ld.a.0 to i32 268 %ld.b.0 = load i16, i16* %b 269 %ld.a.1 = load i16, i16* %addr.a.1 270 %ld.b.1 = load i16, i16* %addr.b.1 271 %sext.a.1 = sext i16 %ld.a.1 to i32 272 %sext.b.1 = sext i16 %ld.b.1 to i32 273 %sext.b.0 = sext i16 %ld.b.0 to i32 274 %mul.0 = mul i32 %sext.a.1, %sext.b.0 275 %mul.1 = mul i32 %sext.a.0, %sext.b.1 276 %add = add i32 %mul.0, %mul.1 277 %res = add i32 %add, %acc 278 ret i32 %res 279} 280 281; CHECK-LABEL: exchange_swap_2 282; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 283; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 284; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 285; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 286; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]] 287define i32 @exchange_swap_2(i16* %a, i16* %b, i32 %acc) { 288entry: 289 %addr.a.1 = getelementptr i16, i16* %a, i32 1 290 %addr.b.1 = getelementptr i16, i16* %b, i32 1 291 %ld.a.0 = load i16, i16* %a 292 %sext.a.0 = sext i16 %ld.a.0 to i32 293 %ld.b.0 = load i16, i16* %b 294 %ld.a.1 = load i16, i16* %addr.a.1 295 %ld.b.1 = load i16, i16* %addr.b.1 296 %sext.a.1 = sext i16 %ld.a.1 to i32 297 %sext.b.1 = sext i16 %ld.b.1 to i32 298 %sext.b.0 = sext i16 %ld.b.0 to i32 299 %mul.0 = mul i32 %sext.a.1, %sext.b.0 300 %mul.1 = mul i32 %sext.a.0, %sext.b.1 301 %add = add i32 %mul.1, %mul.0 302 %res = add i32 %add, %acc 303 ret i32 %res 304} 305 306; CHECK-LABEL: exchange_swap_3 307; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* 308; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]] 309; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* 310; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]] 311; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]] 312define i32 @exchange_swap_3(i16* %a, i16* %b, i32 %acc) { 313entry: 314 %addr.a.1 = getelementptr i16, i16* %a, i32 1 315 %addr.b.1 = getelementptr i16, i16* %b, i32 1 316 %ld.a.0 = load i16, i16* %a 317 %sext.a.0 = sext i16 %ld.a.0 to i32 318 %ld.b.0 = load i16, i16* %b 319 %ld.a.1 = load i16, i16* %addr.a.1 320 %ld.b.1 = load i16, i16* %addr.b.1 321 %sext.a.1 = sext i16 %ld.a.1 to i32 322 %sext.b.1 = sext i16 %ld.b.1 to i32 323 %sext.b.0 = sext i16 %ld.b.0 to i32 324 %mul.0 = mul i32 %sext.b.0, %sext.a.1 325 %mul.1 = mul i32 %sext.b.1, %sext.a.0 326 %add = add i32 %mul.1, %mul.0 327 %res = add i32 %add, %acc 328 ret i32 %res 329} 330