1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s 3 4;----------------------------------------------------------------------------- 5; RDMA Vector 6 7declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) 8declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) 9declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) 10declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) 11 12declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) 13declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) 14declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) 15declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) 16 17declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) 18declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) 19declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) 20declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) 21 22declare <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) 23declare <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 24declare <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 25declare <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 26declare <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) 27declare <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 28declare <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 29declare <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 30 31; The sadd intrinsics in this file previously transformed into sqrdmlah where they 32; shouldn't. They should produce vqrdmulh and vadd. 33 34define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { 35; CHECK-LABEL: test_vqrdmulah_v4i16: 36; CHECK: @ %bb.0: 37; CHECK-NEXT: vqrdmulh.s16 d16, d1, d2 38; CHECK-NEXT: vqadd.s16 d0, d0, d16 39; CHECK-NEXT: bx lr 40 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) 41 %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) 42 ret <4 x i16> %retval 43} 44 45define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { 46; CHECK-LABEL: test_vqrdmulah_v8i16: 47; CHECK: @ %bb.0: 48; CHECK-NEXT: vqrdmulh.s16 q8, q1, q2 49; CHECK-NEXT: vqadd.s16 q0, q0, q8 50; CHECK-NEXT: bx lr 51 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) 52 %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) 53 ret <8 x i16> %retval 54} 55 56define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { 57; CHECK-LABEL: test_vqrdmulah_v2i32: 58; CHECK: @ %bb.0: 59; CHECK-NEXT: vqrdmulh.s32 d16, d1, d2 60; CHECK-NEXT: vqadd.s32 d0, d0, d16 61; CHECK-NEXT: bx lr 62 %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) 63 %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) 64 ret <2 x i32> %retval 65} 66 67define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { 68; CHECK-LABEL: test_vqrdmulah_v4i32: 69; CHECK: @ %bb.0: 70; CHECK-NEXT: vqrdmulh.s32 q8, q1, q2 71; CHECK-NEXT: vqadd.s32 q0, q0, q8 72; CHECK-NEXT: bx lr 73 %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) 74 %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) 75 ret <4 x i32> %retval 76} 77 78define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { 79; CHECK-LABEL: test_vqrdmulsh_v4i16: 80; CHECK: @ %bb.0: 81; CHECK-NEXT: vqrdmulh.s16 d16, d1, d2 82; CHECK-NEXT: vqsub.s16 d0, d0, d16 83; CHECK-NEXT: bx lr 84 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) 85 %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) 86 ret <4 x i16> %retval 87} 88 89define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { 90; CHECK-LABEL: test_vqrdmulsh_v8i16: 91; CHECK: @ %bb.0: 92; CHECK-NEXT: vqrdmulh.s16 q8, q1, q2 93; CHECK-NEXT: vqsub.s16 q0, q0, q8 94; CHECK-NEXT: bx lr 95 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) 96 %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) 97 ret <8 x i16> %retval 98} 99 100define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { 101; CHECK-LABEL: test_vqrdmulsh_v2i32: 102; CHECK: @ %bb.0: 103; CHECK-NEXT: vqrdmulh.s32 d16, d1, d2 104; CHECK-NEXT: vqsub.s32 d0, d0, d16 105; CHECK-NEXT: bx lr 106 %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) 107 %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) 108 ret <2 x i32> %retval 109} 110 111define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { 112; CHECK-LABEL: test_vqrdmulsh_v4i32: 113; CHECK: @ %bb.0: 114; CHECK-NEXT: vqrdmulh.s32 q8, q1, q2 115; CHECK-NEXT: vqsub.s32 q0, q0, q8 116; CHECK-NEXT: bx lr 117 %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) 118 %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) 119 ret <4 x i32> %retval 120} 121 122;----------------------------------------------------------------------------- 123; RDMA Scalar 124 125define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { 126; CHECK-LABEL: test_vqrdmulah_lane_s16: 127; CHECK: @ %bb.0: @ %entry 128; CHECK-NEXT: vqrdmulh.s16 d16, d1, d2[3] 129; CHECK-NEXT: vqadd.s16 d0, d0, d16 130; CHECK-NEXT: bx lr 131entry: 132 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 133 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) 134 %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) 135 ret <4 x i16> %retval 136} 137 138define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { 139; CHECK-LABEL: test_vqrdmulahq_lane_s16: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 142; CHECK-NEXT: vqrdmulh.s16 q8, q1, d4[2] 143; CHECK-NEXT: vqadd.s16 q0, q0, q8 144; CHECK-NEXT: bx lr 145entry: 146 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 147 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) 148 %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) 149 ret <8 x i16> %retval 150} 151 152define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { 153; CHECK-LABEL: test_vqrdmulah_lane_s32: 154; CHECK: @ %bb.0: @ %entry 155; CHECK-NEXT: vqrdmulh.s32 d16, d1, d2[1] 156; CHECK-NEXT: vqadd.s32 d0, d0, d16 157; CHECK-NEXT: bx lr 158entry: 159 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 160 %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) 161 %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) 162 ret <2 x i32> %retval 163} 164 165define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { 166; CHECK-LABEL: test_vqrdmulahq_lane_s32: 167; CHECK: @ %bb.0: @ %entry 168; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 169; CHECK-NEXT: vqrdmulh.s32 q8, q1, d4[0] 170; CHECK-NEXT: vqadd.s32 q0, q0, q8 171; CHECK-NEXT: bx lr 172entry: 173 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 174 %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) 175 %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) 176 ret <4 x i32> %retval 177} 178 179define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { 180; CHECK-LABEL: test_vqrdmulsh_lane_s16: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: vqrdmulh.s16 d16, d1, d2[3] 183; CHECK-NEXT: vqsub.s16 d0, d0, d16 184; CHECK-NEXT: bx lr 185entry: 186 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 187 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) 188 %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) 189 ret <4 x i16> %retval 190} 191 192define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { 193; CHECK-LABEL: test_vqrdmulshq_lane_s16: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 196; CHECK-NEXT: vqrdmulh.s16 q8, q1, d4[2] 197; CHECK-NEXT: vqsub.s16 q0, q0, q8 198; CHECK-NEXT: bx lr 199entry: 200 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 201 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) 202 %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) 203 ret <8 x i16> %retval 204} 205 206define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { 207; CHECK-LABEL: test_vqrdmulsh_lane_s32: 208; CHECK: @ %bb.0: @ %entry 209; CHECK-NEXT: vqrdmulh.s32 d16, d1, d2[1] 210; CHECK-NEXT: vqsub.s32 d0, d0, d16 211; CHECK-NEXT: bx lr 212entry: 213 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 214 %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) 215 %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) 216 ret <2 x i32> %retval 217} 218 219define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { 220; CHECK-LABEL: test_vqrdmulshq_lane_s32: 221; CHECK: @ %bb.0: @ %entry 222; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 223; CHECK-NEXT: vqrdmulh.s32 q8, q1, d4[0] 224; CHECK-NEXT: vqsub.s32 q0, q0, q8 225; CHECK-NEXT: bx lr 226entry: 227 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 228 %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) 229 %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) 230 ret <4 x i32> %retval 231} 232 233 234 235define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlah_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { 236; CHECK-LABEL: test_vqrdmlah_s16: 237; CHECK: @ %bb.0: @ %entry 238; CHECK-NEXT: vqrdmlah.s16 d0, d1, d2 239; CHECK-NEXT: bx lr 240entry: 241 %vqrdmlah_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #3 242 ret <4 x i16> %vqrdmlah_v3.i 243} 244 245define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlah_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { 246; CHECK-LABEL: test_vqrdmlah_s32: 247; CHECK: @ %bb.0: @ %entry 248; CHECK-NEXT: vqrdmlah.s32 d0, d1, d2 249; CHECK-NEXT: bx lr 250entry: 251 %vqrdmlah_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #3 252 ret <2 x i32> %vqrdmlah_v3.i 253} 254 255define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { 256; CHECK-LABEL: test_vqrdmlahq_s16: 257; CHECK: @ %bb.0: @ %entry 258; CHECK-NEXT: vqrdmlah.s16 q0, q1, q2 259; CHECK-NEXT: bx lr 260entry: 261 %vqrdmlahq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #3 262 ret <8 x i16> %vqrdmlahq_v3.i 263} 264 265define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 266; CHECK-LABEL: test_vqrdmlahq_s32: 267; CHECK: @ %bb.0: @ %entry 268; CHECK-NEXT: vqrdmlah.s32 q0, q1, q2 269; CHECK-NEXT: bx lr 270entry: 271 %vqrdmlahq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #3 272 ret <4 x i32> %vqrdmlahq_v3.i 273} 274 275define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { 276; CHECK-LABEL: test_vqrdmlah_lane_s16: 277; CHECK: @ %bb.0: @ %entry 278; CHECK-NEXT: vqrdmlah.s16 d0, d1, d2[3] 279; CHECK-NEXT: bx lr 280entry: 281 %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 282 %vqrdmlah_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #3 283 ret <4 x i16> %vqrdmlah_v3.i 284} 285 286define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { 287; CHECK-LABEL: test_vqrdmlah_lane_s32: 288; CHECK: @ %bb.0: @ %entry 289; CHECK-NEXT: vqrdmlah.s32 d0, d1, d2[1] 290; CHECK-NEXT: bx lr 291entry: 292 %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 293 %vqrdmlah_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #3 294 ret <2 x i32> %vqrdmlah_v3.i 295} 296 297define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) { 298; CHECK-LABEL: test_vqrdmlahq_lane_s16: 299; CHECK: @ %bb.0: @ %entry 300; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 301; CHECK-NEXT: vqrdmlah.s16 q0, q1, d4[3] 302; CHECK-NEXT: bx lr 303entry: 304 %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 305 %vqrdmlahq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %lane) #3 306 ret <8 x i16> %vqrdmlahq_v3.i 307} 308 309define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) { 310; CHECK-LABEL: test_vqrdmlahq_lane_s32: 311; CHECK: @ %bb.0: @ %entry 312; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 313; CHECK-NEXT: vqrdmlah.s32 q0, q1, d4[1] 314; CHECK-NEXT: bx lr 315entry: 316 %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 317 %vqrdmlahq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %lane) #3 318 ret <4 x i32> %vqrdmlahq_v3.i 319} 320 321define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlsh_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { 322; CHECK-LABEL: test_vqrdmlsh_s16: 323; CHECK: @ %bb.0: @ %entry 324; CHECK-NEXT: vqrdmlsh.s16 d0, d1, d2 325; CHECK-NEXT: bx lr 326entry: 327 %vqrdmlsh_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #3 328 ret <4 x i16> %vqrdmlsh_v3.i 329} 330 331define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlsh_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { 332; CHECK-LABEL: test_vqrdmlsh_s32: 333; CHECK: @ %bb.0: @ %entry 334; CHECK-NEXT: vqrdmlsh.s32 d0, d1, d2 335; CHECK-NEXT: bx lr 336entry: 337 %vqrdmlsh_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #3 338 ret <2 x i32> %vqrdmlsh_v3.i 339} 340 341define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlshq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { 342; CHECK-LABEL: test_vqrdmlshq_s16: 343; CHECK: @ %bb.0: @ %entry 344; CHECK-NEXT: vqrdmlsh.s16 q0, q1, q2 345; CHECK-NEXT: bx lr 346entry: 347 %vqrdmlshq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #3 348 ret <8 x i16> %vqrdmlshq_v3.i 349} 350 351define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlshq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 352; CHECK-LABEL: test_vqrdmlshq_s32: 353; CHECK: @ %bb.0: @ %entry 354; CHECK-NEXT: vqrdmlsh.s32 q0, q1, q2 355; CHECK-NEXT: bx lr 356entry: 357 %vqrdmlshq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #3 358 ret <4 x i32> %vqrdmlshq_v3.i 359} 360 361define arm_aapcs_vfpcc <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { 362; CHECK-LABEL: test_vqrdmlsh_lane_s16: 363; CHECK: @ %bb.0: @ %entry 364; CHECK-NEXT: vqrdmlsh.s16 d0, d1, d2[3] 365; CHECK-NEXT: bx lr 366entry: 367 %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 368 %vqrdmlsh_v3.i = tail call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #3 369 ret <4 x i16> %vqrdmlsh_v3.i 370} 371 372define arm_aapcs_vfpcc <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { 373; CHECK-LABEL: test_vqrdmlsh_lane_s32: 374; CHECK: @ %bb.0: @ %entry 375; CHECK-NEXT: vqrdmlsh.s32 d0, d1, d2[1] 376; CHECK-NEXT: bx lr 377entry: 378 %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 379 %vqrdmlsh_v3.i = tail call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #3 380 ret <2 x i32> %vqrdmlsh_v3.i 381} 382 383define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) { 384; CHECK-LABEL: test_vqrdmlshq_lane_s16: 385; CHECK: @ %bb.0: @ %entry 386; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 387; CHECK-NEXT: vqrdmlsh.s16 q0, q1, d4[3] 388; CHECK-NEXT: bx lr 389entry: 390 %lane = shufflevector <4 x i16> %c, <4 x i16> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 391 %vqrdmlshq_v3.i = tail call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %lane) #3 392 ret <8 x i16> %vqrdmlshq_v3.i 393} 394 395define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) { 396; CHECK-LABEL: test_vqrdmlshq_lane_s32: 397; CHECK: @ %bb.0: @ %entry 398; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 399; CHECK-NEXT: vqrdmlsh.s32 q0, q1, d4[1] 400; CHECK-NEXT: bx lr 401entry: 402 %lane = shufflevector <2 x i32> %c, <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 403 %vqrdmlshq_v3.i = tail call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %lane) #3 404 ret <4 x i32> %vqrdmlshq_v3.i 405} 406