1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s 3 4declare half @llvm.aarch64.neon.fmulx.f16(half, half) 5declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) 6declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) 7declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) 8declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 9declare half @llvm.fma.f16(half, half, half) #1 10 11define <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { 12; CHECK-LABEL: t_vfma_lane_f16: 13; CHECK: // %bb.0: // %entry 14; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 15; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] 16; CHECK-NEXT: ret 17entry: 18 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer 19 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a) 20 ret <4 x half> %fmla3 21} 22 23define <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { 24; CHECK-LABEL: t_vfmaq_lane_f16: 25; CHECK: // %bb.0: // %entry 26; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 27; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] 28; CHECK-NEXT: ret 29entry: 30 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer 31 %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a) 32 ret <8 x half> %fmla3 33} 34 35define <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { 36; CHECK-LABEL: t_vfma_laneq_f16: 37; CHECK: // %bb.0: // %entry 38; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] 39; CHECK-NEXT: ret 40entry: 41 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer 42 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a) 43 ret <4 x half> %0 44} 45 46define <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { 47; CHECK-LABEL: t_vfmaq_laneq_f16: 48; CHECK: // %bb.0: // %entry 49; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] 50; CHECK-NEXT: ret 51entry: 52 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer 53 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a) 54 ret <8 x half> %0 55} 56 57define <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) { 58; CHECK-LABEL: t_vfma_n_f16: 59; CHECK: // %bb.0: // %entry 60; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 61; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0] 62; CHECK-NEXT: ret 63entry: 64 %vecinit = insertelement <4 x half> undef, half %c, i32 0 65 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 66 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4 67 ret <4 x half> %0 68} 69 70define <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { 71; CHECK-LABEL: t_vfmaq_n_f16: 72; CHECK: // %bb.0: // %entry 73; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 74; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0] 75; CHECK-NEXT: ret 76entry: 77 %vecinit = insertelement <8 x half> undef, half %c, i32 0 78 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 79 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4 80 ret <8 x half> %0 81} 82 83define half @t_vfmah_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) { 84; CHECK-LABEL: t_vfmah_lane_f16_0: 85; CHECK: // %bb.0: // %entry 86; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 87; CHECK-NEXT: fmadd h0, h1, h2, h0 88; CHECK-NEXT: ret 89entry: 90 %extract = extractelement <4 x half> %c, i32 0 91 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 92 ret half %0 93} 94 95define half @t_vfmah_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) { 96; CHECK-LABEL: t_vfmah_lane_f16_0_swap: 97; CHECK: // %bb.0: // %entry 98; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 99; CHECK-NEXT: fmadd h0, h2, h1, h0 100; CHECK-NEXT: ret 101entry: 102 %extract = extractelement <4 x half> %c, i32 0 103 %0 = tail call half @llvm.fma.f16(half %extract, half %b, half %a) 104 ret half %0 105} 106 107define half @t_vfmah_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) { 108; CHECK-LABEL: t_vfmah_lane_f16_3: 109; CHECK: // %bb.0: // %entry 110; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 111; CHECK-NEXT: fmla h0, h1, v2.h[3] 112; CHECK-NEXT: ret 113entry: 114 %extract = extractelement <4 x half> %c, i32 3 115 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 116 ret half %0 117} 118 119define half @t_vfmah_lane_f16_3_0(half %a, <4 x half> %c) { 120; CHECK-LABEL: t_vfmah_lane_f16_3_0: 121; CHECK: // %bb.0: // %entry 122; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 123; CHECK-NEXT: fmla h0, h1, v1.h[3] 124; CHECK-NEXT: ret 125entry: 126 %b = extractelement <4 x half> %c, i32 0 127 %extract = extractelement <4 x half> %c, i32 3 128 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 129 ret half %0 130} 131 132define half @t_vfmah_lane_f16_0_0(half %a, <4 x half> %b, <4 x half> %c) { 133; CHECK-LABEL: t_vfmah_lane_f16_0_0: 134; CHECK: // %bb.0: // %entry 135; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 136; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 137; CHECK-NEXT: fmadd h0, h1, h2, h0 138; CHECK-NEXT: ret 139entry: 140 %b0 = extractelement <4 x half> %b, i32 0 141 %c0 = extractelement <4 x half> %c, i32 0 142 %0 = tail call half @llvm.fma.f16(half %b0, half %c0, half %a) 143 ret half %0 144} 145 146define half @t_vfmah_laneq_f16_0(half %a, half %b, <8 x half> %c, i32 %lane) { 147; CHECK-LABEL: t_vfmah_laneq_f16_0: 148; CHECK: // %bb.0: // %entry 149; CHECK-NEXT: fmadd h0, h1, h2, h0 150; CHECK-NEXT: ret 151entry: 152 %extract = extractelement <8 x half> %c, i32 0 153 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 154 ret half %0 155} 156 157define half @t_vfmah_laneq_f16_0_swap(half %a, half %b, <8 x half> %c, i32 %lane) { 158; CHECK-LABEL: t_vfmah_laneq_f16_0_swap: 159; CHECK: // %bb.0: // %entry 160; CHECK-NEXT: fmadd h0, h2, h1, h0 161; CHECK-NEXT: ret 162entry: 163 %extract = extractelement <8 x half> %c, i32 0 164 %0 = tail call half @llvm.fma.f16(half %extract, half %b, half %a) 165 ret half %0 166} 167 168define half @t_vfmah_laneq_f16_7(half %a, half %b, <8 x half> %c, i32 %lane) { 169; CHECK-LABEL: t_vfmah_laneq_f16_7: 170; CHECK: // %bb.0: // %entry 171; CHECK-NEXT: fmla h0, h1, v2.h[7] 172; CHECK-NEXT: ret 173entry: 174 %extract = extractelement <8 x half> %c, i32 7 175 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 176 ret half %0 177} 178 179define <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { 180; CHECK-LABEL: t_vfms_lane_f16: 181; CHECK: // %bb.0: // %entry 182; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 183; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] 184; CHECK-NEXT: ret 185entry: 186 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 187 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer 188 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a) 189 ret <4 x half> %fmla3 190} 191 192define <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { 193; CHECK-LABEL: t_vfmsq_lane_f16: 194; CHECK: // %bb.0: // %entry 195; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 196; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] 197; CHECK-NEXT: ret 198entry: 199 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 200 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer 201 %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a) 202 ret <8 x half> %fmla3 203} 204 205define <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { 206; CHECK-LABEL: t_vfms_laneq_f16: 207; CHECK: // %bb.0: // %entry 208; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] 209; CHECK-NEXT: ret 210entry: 211 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 212 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer 213 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a) 214 ret <4 x half> %0 215} 216 217define <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { 218; CHECK-LABEL: t_vfmsq_laneq_f16: 219; CHECK: // %bb.0: // %entry 220; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] 221; CHECK-NEXT: ret 222entry: 223 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 224 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer 225 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a) 226 ret <8 x half> %0 227} 228 229define <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) { 230; CHECK-LABEL: t_vfms_n_f16: 231; CHECK: // %bb.0: // %entry 232; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 233; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0] 234; CHECK-NEXT: ret 235entry: 236 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 237 %vecinit = insertelement <4 x half> undef, half %c, i32 0 238 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 239 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4 240 ret <4 x half> %0 241} 242 243define <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { 244; CHECK-LABEL: t_vfmsq_n_f16: 245; CHECK: // %bb.0: // %entry 246; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2 247; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0] 248; CHECK-NEXT: ret 249entry: 250 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 251 %vecinit = insertelement <8 x half> undef, half %c, i32 0 252 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 253 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4 254 ret <8 x half> %0 255} 256 257define half @t_vfmsh_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) { 258; CHECK-LABEL: t_vfmsh_lane_f16_0: 259; CHECK: // %bb.0: // %entry 260; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 261; CHECK-NEXT: fmsub h0, h2, h1, h0 262; CHECK-NEXT: ret 263entry: 264 %0 = fsub half 0xH8000, %b 265 %extract = extractelement <4 x half> %c, i32 0 266 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 267 ret half %1 268} 269 270define half @t_vfmsh_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) { 271; CHECK-LABEL: t_vfmsh_lane_f16_0_swap: 272; CHECK: // %bb.0: // %entry 273; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 274; CHECK-NEXT: fmsub h0, h2, h1, h0 275; CHECK-NEXT: ret 276entry: 277 %0 = fsub half 0xH8000, %b 278 %extract = extractelement <4 x half> %c, i32 0 279 %1 = tail call half @llvm.fma.f16(half %extract, half %0, half %a) 280 ret half %1 281} 282 283define half @t_vfmsh_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) { 284; CHECK-LABEL: t_vfmsh_lane_f16_3: 285; CHECK: // %bb.0: // %entry 286; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 287; CHECK-NEXT: fmls h0, h1, v2.h[3] 288; CHECK-NEXT: ret 289entry: 290 %0 = fsub half 0xH8000, %b 291 %extract = extractelement <4 x half> %c, i32 3 292 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 293 ret half %1 294} 295 296define half @t_vfmsh_laneq_f16_0(half %a, half %b, <8 x half> %c, i32 %lane) { 297; CHECK-LABEL: t_vfmsh_laneq_f16_0: 298; CHECK: // %bb.0: // %entry 299; CHECK-NEXT: fmsub h0, h2, h1, h0 300; CHECK-NEXT: ret 301entry: 302 %0 = fsub half 0xH8000, %b 303 %extract = extractelement <8 x half> %c, i32 0 304 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 305 ret half %1 306} 307 308define half @t_vfmsh_lane_f16_0_3(half %a, <4 x half> %c, i32 %lane) { 309; CHECK-LABEL: t_vfmsh_lane_f16_0_3: 310; CHECK: // %bb.0: // %entry 311; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 312; CHECK-NEXT: fmls h0, h1, v1.h[3] 313; CHECK-NEXT: ret 314entry: 315 %b = extractelement <4 x half> %c, i32 0 316 %0 = fsub half 0xH8000, %b 317 %extract = extractelement <4 x half> %c, i32 3 318 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 319 ret half %1 320} 321 322define half @t_vfmsh_laneq_f16_0_swap(half %a, half %b, <8 x half> %c, i32 %lane) { 323; CHECK-LABEL: t_vfmsh_laneq_f16_0_swap: 324; CHECK: // %bb.0: // %entry 325; CHECK-NEXT: fmsub h0, h2, h1, h0 326; CHECK-NEXT: ret 327entry: 328 %0 = fsub half 0xH8000, %b 329 %extract = extractelement <8 x half> %c, i32 0 330 %1 = tail call half @llvm.fma.f16(half %extract, half %0, half %a) 331 ret half %1 332} 333 334define half @t_vfmsh_laneq_f16_7(half %a, half %b, <8 x half> %c, i32 %lane) { 335; CHECK-LABEL: t_vfmsh_laneq_f16_7: 336; CHECK: // %bb.0: // %entry 337; CHECK-NEXT: fmls h0, h1, v2.h[7] 338; CHECK-NEXT: ret 339entry: 340 %0 = fsub half 0xH8000, %b 341 %extract = extractelement <8 x half> %c, i32 7 342 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 343 ret half %1 344} 345 346define <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { 347; CHECK-LABEL: t_vmul_laneq_f16: 348; CHECK: // %bb.0: // %entry 349; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0] 350; CHECK-NEXT: ret 351entry: 352 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer 353 %mul = fmul <4 x half> %shuffle, %a 354 ret <4 x half> %mul 355} 356 357define <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { 358; CHECK-LABEL: t_vmulq_laneq_f16: 359; CHECK: // %bb.0: // %entry 360; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0] 361; CHECK-NEXT: ret 362entry: 363 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer 364 %mul = fmul <8 x half> %shuffle, %a 365 ret <8 x half> %mul 366} 367 368define half @t_vmulh_lane0_f16(half %a, <4 x half> %c, i32 %lane) { 369; CHECK-LABEL: t_vmulh_lane0_f16: 370; CHECK: // %bb.0: // %entry 371; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 372; CHECK-NEXT: fmul h0, h0, h1 373; CHECK-NEXT: ret 374entry: 375 %0 = extractelement <4 x half> %c, i32 0 376 %1 = fmul half %0, %a 377 ret half %1 378} 379 380define half @t_vmulh_lane3_f16(half %a, <4 x half> %c, i32 %lane) { 381; CHECK-LABEL: t_vmulh_lane3_f16: 382; CHECK: // %bb.0: // %entry 383; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 384; CHECK-NEXT: fmul h0, h0, v1.h[3] 385; CHECK-NEXT: ret 386entry: 387 %0 = extractelement <4 x half> %c, i32 3 388 %1 = fmul half %0, %a 389 ret half %1 390} 391 392define half @t_vmulh_laneq0_f16(half %a, <8 x half> %c, i32 %lane) { 393; CHECK-LABEL: t_vmulh_laneq0_f16: 394; CHECK: // %bb.0: // %entry 395; CHECK-NEXT: fmul h0, h0, h1 396; CHECK-NEXT: ret 397entry: 398 %0 = extractelement <8 x half> %c, i32 0 399 %1 = fmul half %0, %a 400 ret half %1 401} 402 403define half @t_vmulh_laneq7_f16(half %a, <8 x half> %c, i32 %lane) { 404; CHECK-LABEL: t_vmulh_laneq7_f16: 405; CHECK: // %bb.0: // %entry 406; CHECK-NEXT: fmul h0, h0, v1.h[7] 407; CHECK-NEXT: ret 408entry: 409 %0 = extractelement <8 x half> %c, i32 7 410 %1 = fmul half %0, %a 411 ret half %1 412} 413 414define half @t_vmulx_f16(half %a, half %b) { 415; CHECK-LABEL: t_vmulx_f16: 416; CHECK: // %bb.0: // %entry 417; CHECK-NEXT: fmulx h0, h0, h1 418; CHECK-NEXT: ret 419entry: 420 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) 421 ret half %fmulx.i 422} 423 424define half @t_vmulxh_lane0_f16(half %a, <4 x half> %b) { 425; CHECK-LABEL: t_vmulxh_lane0_f16: 426; CHECK: // %bb.0: // %entry 427; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 428; CHECK-NEXT: fmulx h0, h0, h1 429; CHECK-NEXT: ret 430entry: 431 %extract = extractelement <4 x half> %b, i32 0 432 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) 433 ret half %fmulx.i 434} 435 436define half @t_vmulxh_lane3_f16(half %a, <4 x half> %b, i32 %lane) { 437; CHECK-LABEL: t_vmulxh_lane3_f16: 438; CHECK: // %bb.0: // %entry 439; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 440; CHECK-NEXT: fmulx h0, h0, v1.h[3] 441; CHECK-NEXT: ret 442entry: 443 %extract = extractelement <4 x half> %b, i32 3 444 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) 445 ret half %fmulx.i 446} 447 448define <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) { 449; CHECK-LABEL: t_vmulx_lane_f16: 450; CHECK: // %bb.0: // %entry 451; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 452; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0] 453; CHECK-NEXT: ret 454entry: 455 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer 456 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 457 ret <4 x half> %vmulx2.i 458} 459 460define <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) { 461; CHECK-LABEL: t_vmulxq_lane_f16: 462; CHECK: // %bb.0: // %entry 463; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 464; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0] 465; CHECK-NEXT: ret 466entry: 467 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer 468 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 469 ret <8 x half> %vmulx2.i 470} 471 472define <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { 473; CHECK-LABEL: t_vmulx_laneq_f16: 474; CHECK: // %bb.0: // %entry 475; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0] 476; CHECK-NEXT: ret 477entry: 478 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer 479 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 480 ret <4 x half> %vmulx2.i 481} 482 483define <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { 484; CHECK-LABEL: t_vmulxq_laneq_f16: 485; CHECK: // %bb.0: // %entry 486; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0] 487; CHECK-NEXT: ret 488entry: 489 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer 490 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 491 ret <8 x half> %vmulx2.i 492} 493 494define half @t_vmulxh_laneq0_f16(half %a, <8 x half> %b) { 495; CHECK-LABEL: t_vmulxh_laneq0_f16: 496; CHECK: // %bb.0: // %entry 497; CHECK-NEXT: fmulx h0, h0, h1 498; CHECK-NEXT: ret 499entry: 500 %extract = extractelement <8 x half> %b, i32 0 501 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) 502 ret half %fmulx.i 503} 504 505define half @t_vmulxh_laneq7_f16(half %a, <8 x half> %b, i32 %lane) { 506; CHECK-LABEL: t_vmulxh_laneq7_f16: 507; CHECK: // %bb.0: // %entry 508; CHECK-NEXT: fmulx h0, h0, v1.h[7] 509; CHECK-NEXT: ret 510entry: 511 %extract = extractelement <8 x half> %b, i32 7 512 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) 513 ret half %fmulx.i 514} 515 516define <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) { 517; CHECK-LABEL: t_vmulx_n_f16: 518; CHECK: // %bb.0: // %entry 519; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 520; CHECK-NEXT: dup v1.4h, v1.h[0] 521; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h 522; CHECK-NEXT: ret 523entry: 524 %vecinit = insertelement <4 x half> undef, half %c, i32 0 525 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 526 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4 527 ret <4 x half> %vmulx2.i 528} 529 530define <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) { 531; CHECK-LABEL: t_vmulxq_n_f16: 532; CHECK: // %bb.0: // %entry 533; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 534; CHECK-NEXT: dup v1.8h, v1.h[0] 535; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h 536; CHECK-NEXT: ret 537entry: 538 %vecinit = insertelement <8 x half> undef, half %c, i32 0 539 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 540 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4 541 ret <8 x half> %vmulx2.i 542} 543 544define half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) { 545; CHECK-LABEL: t_vfmah_lane3_f16: 546; CHECK: // %bb.0: // %entry 547; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 548; CHECK-NEXT: fmla h0, h1, v2.h[3] 549; CHECK-NEXT: ret 550entry: 551 %extract = extractelement <4 x half> %c, i32 3 552 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 553 ret half %0 554} 555 556define half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) { 557; CHECK-LABEL: t_vfmah_laneq7_f16: 558; CHECK: // %bb.0: // %entry 559; CHECK-NEXT: fmla h0, h1, v2.h[7] 560; CHECK-NEXT: ret 561entry: 562 %extract = extractelement <8 x half> %c, i32 7 563 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 564 ret half %0 565} 566 567define half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) { 568; CHECK-LABEL: t_vfmsh_lane3_f16: 569; CHECK: // %bb.0: // %entry 570; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 571; CHECK-NEXT: fmls h0, h1, v2.h[3] 572; CHECK-NEXT: ret 573entry: 574 %0 = fsub half 0xH8000, %b 575 %extract = extractelement <4 x half> %c, i32 3 576 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 577 ret half %1 578} 579 580define half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) { 581; CHECK-LABEL: t_vfmsh_laneq7_f16: 582; CHECK: // %bb.0: // %entry 583; CHECK-NEXT: fmls h0, h1, v2.h[7] 584; CHECK-NEXT: ret 585entry: 586 %0 = fsub half 0xH8000, %b 587 %extract = extractelement <8 x half> %c, i32 7 588 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) 589 ret half %1 590} 591 592define half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) { 593; CHECK-LABEL: t_fadd_vfmah_f16: 594; CHECK: // %bb.0: // %entry 595; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h 596; CHECK-NEXT: fmla h0, h1, v2.h[3] 597; CHECK-NEXT: ret 598entry: 599 %0 = fadd <4 x half> %c, %d 600 %extract = extractelement <4 x half> %0, i32 3 601 %1 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) 602 ret half %1 603} 604 605define half @test_fmulx_horizontal_f16(<2 x half> %v) { 606; CHECK-LABEL: test_fmulx_horizontal_f16: 607; CHECK: // %bb.0: // %entry 608; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 609; CHECK-NEXT: fmulx h0, h0, v0.h[1] 610; CHECK-NEXT: ret 611entry: 612 %0 = extractelement <2 x half> %v, i32 0 613 %1 = extractelement <2 x half> %v, i32 1 614 %2 = call half @llvm.aarch64.neon.fmulx.f16(half %0, half %1) 615 ret half %2 616} 617