1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s 3 4declare half @llvm.fma.f16(half, half, half) 5declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 6declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>) 7declare <32 x half> @llvm.fma.v32f16(<32 x half>, <32 x half>, <32 x half>) 8 9define half @fma_123_f16(half %x, half %y, half %z) { 10; CHECK-LABEL: fma_123_f16: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0 13; CHECK-NEXT: retq 14 %a = call half @llvm.fma.f16(half %x, half %y, half %z) 15 ret half %a 16} 17 18define half @fma_213_f16(half %x, half %y, half %z) { 19; CHECK-LABEL: fma_213_f16: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0 22; CHECK-NEXT: retq 23 %a = call half @llvm.fma.f16(half %y, half %x, half %z) 24 ret half %a 25} 26 27define half @fma_231_f16(half %x, half %y, half %z) { 28; CHECK-LABEL: fma_231_f16: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vfmadd231sh %xmm1, %xmm2, %xmm0 31; CHECK-NEXT: retq 32 %a = call half @llvm.fma.f16(half %y, half %z, half %x) 33 ret half %a 34} 35 36define half @fma_321_f16(half %x, half %y, half %z) { 37; CHECK-LABEL: fma_321_f16: 38; CHECK: # %bb.0: 39; CHECK-NEXT: vfmadd231sh %xmm1, %xmm2, %xmm0 40; CHECK-NEXT: retq 41 %a = call half @llvm.fma.f16(half %z, half %y, half %x) 42 ret half %a 43} 44 45define half @fma_132_f16(half %x, half %y, half %z) { 46; CHECK-LABEL: fma_132_f16: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vfmadd213sh %xmm1, %xmm2, %xmm0 49; CHECK-NEXT: retq 50 %a = call half @llvm.fma.f16(half %x, half %z, half %y) 51 ret half %a 52} 53 54define half @fma_312_f16(half %x, half %y, half %z) { 55; CHECK-LABEL: fma_312_f16: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vfmadd213sh %xmm1, %xmm2, %xmm0 58; CHECK-NEXT: retq 59 %a = call half @llvm.fma.f16(half %z, half %x, half %y) 60 ret half %a 61} 62 63define half @fma_load_123_f16(half %x, half %y, ptr %zp) { 64; CHECK-LABEL: fma_load_123_f16: 65; CHECK: # %bb.0: 66; CHECK-NEXT: vfmadd213sh (%rdi), %xmm1, %xmm0 67; CHECK-NEXT: retq 68 %z = load half, ptr %zp 69 %a = call half @llvm.fma.f16(half %x, half %y, half %z) 70 ret half %a 71} 72 73define half @fma_load_213_f16(half %x, half %y, ptr %zp) { 74; CHECK-LABEL: fma_load_213_f16: 75; CHECK: # %bb.0: 76; CHECK-NEXT: vfmadd213sh (%rdi), %xmm1, %xmm0 77; CHECK-NEXT: retq 78 %z = load half, ptr %zp 79 %a = call half @llvm.fma.f16(half %y, half %x, half %z) 80 ret half %a 81} 82 83define half @fma_load_231_f16(half %x, half %y, ptr %zp) { 84; CHECK-LABEL: fma_load_231_f16: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vfmadd231sh (%rdi), %xmm1, %xmm0 87; CHECK-NEXT: retq 88 %z = load half, ptr %zp 89 %a = call half @llvm.fma.f16(half %y, half %z, half %x) 90 ret half %a 91} 92 93define half @fma_load_321_f16(half %x, half %y, ptr %zp) { 94; CHECK-LABEL: fma_load_321_f16: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vfmadd231sh (%rdi), %xmm1, %xmm0 97; CHECK-NEXT: retq 98 %z = load half, ptr %zp 99 %a = call half @llvm.fma.f16(half %z, half %y, half %x) 100 ret half %a 101} 102 103define half @fma_load_132_f16(half %x, half %y, ptr %zp) { 104; CHECK-LABEL: fma_load_132_f16: 105; CHECK: # %bb.0: 106; CHECK-NEXT: vfmadd132sh (%rdi), %xmm1, %xmm0 107; CHECK-NEXT: retq 108 %z = load half, ptr %zp 109 %a = call half @llvm.fma.f16(half %x, half %z, half %y) 110 ret half %a 111} 112 113define half @fma_load_312_f16(half %x, half %y, ptr %zp) { 114; CHECK-LABEL: fma_load_312_f16: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vfmadd132sh (%rdi), %xmm1, %xmm0 117; CHECK-NEXT: retq 118 %z = load half, ptr %zp 119 %a = call half @llvm.fma.f16(half %z, half %x, half %y) 120 ret half %a 121} 122 123define <8 x half> @fma_123_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { 124; CHECK-LABEL: fma_123_v8f16: 125; CHECK: # %bb.0: 126; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 127; CHECK-NEXT: retq 128 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) 129 ret <8 x half> %a 130} 131 132define <8 x half> @fma_213_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { 133; CHECK-LABEL: fma_213_v8f16: 134; CHECK: # %bb.0: 135; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 136; CHECK-NEXT: retq 137 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z) 138 ret <8 x half> %a 139} 140 141define <8 x half> @fma_231_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { 142; CHECK-LABEL: fma_231_v8f16: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 145; CHECK-NEXT: retq 146 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 147 ret <8 x half> %a 148} 149 150define <8 x half> @fma_321_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { 151; CHECK-LABEL: fma_321_v8f16: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 154; CHECK-NEXT: retq 155 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x) 156 ret <8 x half> %a 157} 158 159define <8 x half> @fma_132_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { 160; CHECK-LABEL: fma_132_v8f16: 161; CHECK: # %bb.0: 162; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 163; CHECK-NEXT: retq 164 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y) 165 ret <8 x half> %a 166} 167 168define <8 x half> @fma_312_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { 169; CHECK-LABEL: fma_312_v8f16: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 172; CHECK-NEXT: retq 173 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) 174 ret <8 x half> %a 175} 176 177define <8 x half> @fma_load_123_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) { 178; CHECK-LABEL: fma_load_123_v8f16: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 181; CHECK-NEXT: retq 182 %z = load <8 x half>, ptr %zp 183 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) 184 ret <8 x half> %a 185} 186 187define <8 x half> @fma_load_213_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) { 188; CHECK-LABEL: fma_load_213_v8f16: 189; CHECK: # %bb.0: 190; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 191; CHECK-NEXT: retq 192 %z = load <8 x half>, ptr %zp 193 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z) 194 ret <8 x half> %a 195} 196 197define <8 x half> @fma_load_231_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) { 198; CHECK-LABEL: fma_load_231_v8f16: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 201; CHECK-NEXT: retq 202 %z = load <8 x half>, ptr %zp 203 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 204 ret <8 x half> %a 205} 206 207define <8 x half> @fma_load_321_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) { 208; CHECK-LABEL: fma_load_321_v8f16: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 211; CHECK-NEXT: retq 212 %z = load <8 x half>, ptr %zp 213 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x) 214 ret <8 x half> %a 215} 216 217define <8 x half> @fma_load_132_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) { 218; CHECK-LABEL: fma_load_132_v8f16: 219; CHECK: # %bb.0: 220; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 221; CHECK-NEXT: retq 222 %z = load <8 x half>, ptr %zp 223 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y) 224 ret <8 x half> %a 225} 226 227define <8 x half> @fma_load_312_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) { 228; CHECK-LABEL: fma_load_312_v8f16: 229; CHECK: # %bb.0: 230; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 231; CHECK-NEXT: retq 232 %z = load <8 x half>, ptr %zp 233 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) 234 ret <8 x half> %a 235} 236 237define <8 x half> @fma_mask_123_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 238; CHECK-LABEL: fma_mask_123_v8f16: 239; CHECK: # %bb.0: 240; CHECK-NEXT: kmovd %edi, %k1 241; CHECK-NEXT: vfmadd132ph %xmm1, %xmm2, %xmm0 {%k1} 242; CHECK-NEXT: retq 243 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) 244 %b = bitcast i8 %mask to <8 x i1> 245 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 246 ret <8 x half> %c 247} 248 249define <8 x half> @fma_mask_213_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 250; CHECK-LABEL: fma_mask_213_v8f16: 251; CHECK: # %bb.0: 252; CHECK-NEXT: kmovd %edi, %k1 253; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} 254; CHECK-NEXT: retq 255 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z) 256 %b = bitcast i8 %mask to <8 x i1> 257 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 258 ret <8 x half> %c 259} 260 261define <8 x half> @fma_mask_231_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 262; CHECK-LABEL: fma_mask_231_v8f16: 263; CHECK: # %bb.0: 264; CHECK-NEXT: kmovd %edi, %k1 265; CHECK-NEXT: vfmadd231ph %xmm2, %xmm1, %xmm0 {%k1} 266; CHECK-NEXT: retq 267 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 268 %b = bitcast i8 %mask to <8 x i1> 269 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 270 ret <8 x half> %c 271} 272 273define <8 x half> @fma_mask_321_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 274; CHECK-LABEL: fma_mask_321_v8f16: 275; CHECK: # %bb.0: 276; CHECK-NEXT: kmovd %edi, %k1 277; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 {%k1} 278; CHECK-NEXT: retq 279 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x) 280 %b = bitcast i8 %mask to <8 x i1> 281 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 282 ret <8 x half> %c 283} 284 285define <8 x half> @fma_mask_132_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 286; CHECK-LABEL: fma_mask_132_v8f16: 287; CHECK: # %bb.0: 288; CHECK-NEXT: kmovd %edi, %k1 289; CHECK-NEXT: vfmadd132ph %xmm2, %xmm1, %xmm0 {%k1} 290; CHECK-NEXT: retq 291 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y) 292 %b = bitcast i8 %mask to <8 x i1> 293 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 294 ret <8 x half> %c 295} 296 297define <8 x half> @fma_mask_312_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 298; CHECK-LABEL: fma_mask_312_v8f16: 299; CHECK: # %bb.0: 300; CHECK-NEXT: kmovd %edi, %k1 301; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 {%k1} 302; CHECK-NEXT: retq 303 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) 304 %b = bitcast i8 %mask to <8 x i1> 305 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 306 ret <8 x half> %c 307} 308 309define <8 x half> @fma_maskz_123_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 310; CHECK-LABEL: fma_maskz_123_v8f16: 311; CHECK: # %bb.0: 312; CHECK-NEXT: kmovd %edi, %k1 313; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} {z} 314; CHECK-NEXT: retq 315 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) 316 %b = bitcast i8 %mask to <8 x i1> 317 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 318 ret <8 x half> %c 319} 320 321define <8 x half> @fma_maskz_213_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 322; CHECK-LABEL: fma_maskz_213_v8f16: 323; CHECK: # %bb.0: 324; CHECK-NEXT: kmovd %edi, %k1 325; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} {z} 326; CHECK-NEXT: retq 327 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z) 328 %b = bitcast i8 %mask to <8 x i1> 329 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 330 ret <8 x half> %c 331} 332 333define <8 x half> @fma_maskz_231_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 334; CHECK-LABEL: fma_maskz_231_v8f16: 335; CHECK: # %bb.0: 336; CHECK-NEXT: kmovd %edi, %k1 337; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 {%k1} {z} 338; CHECK-NEXT: retq 339 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 340 %b = bitcast i8 %mask to <8 x i1> 341 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 342 ret <8 x half> %c 343} 344 345define <8 x half> @fma_maskz_321_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 346; CHECK-LABEL: fma_maskz_321_v8f16: 347; CHECK: # %bb.0: 348; CHECK-NEXT: kmovd %edi, %k1 349; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 {%k1} {z} 350; CHECK-NEXT: retq 351 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x) 352 %b = bitcast i8 %mask to <8 x i1> 353 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 354 ret <8 x half> %c 355} 356 357define <8 x half> @fma_maskz_132_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 358; CHECK-LABEL: fma_maskz_132_v8f16: 359; CHECK: # %bb.0: 360; CHECK-NEXT: kmovd %edi, %k1 361; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 {%k1} {z} 362; CHECK-NEXT: retq 363 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y) 364 %b = bitcast i8 %mask to <8 x i1> 365 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 366 ret <8 x half> %c 367} 368 369define <8 x half> @fma_maskz_312_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) { 370; CHECK-LABEL: fma_maskz_312_v8f16: 371; CHECK: # %bb.0: 372; CHECK-NEXT: kmovd %edi, %k1 373; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 {%k1} {z} 374; CHECK-NEXT: retq 375 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) 376 %b = bitcast i8 %mask to <8 x i1> 377 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 378 ret <8 x half> %c 379} 380 381define <8 x half> @fma_mask_load_123_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 382; CHECK-LABEL: fma_mask_load_123_v8f16: 383; CHECK: # %bb.0: 384; CHECK-NEXT: kmovd %esi, %k1 385; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1} 386; CHECK-NEXT: retq 387 %z = load <8 x half>, ptr %zp 388 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) 389 %b = bitcast i8 %mask to <8 x i1> 390 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 391 ret <8 x half> %c 392} 393 394define <8 x half> @fma_mask_load_213_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 395; CHECK-LABEL: fma_mask_load_213_v8f16: 396; CHECK: # %bb.0: 397; CHECK-NEXT: kmovd %esi, %k1 398; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1} 399; CHECK-NEXT: retq 400 %z = load <8 x half>, ptr %zp 401 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z) 402 %b = bitcast i8 %mask to <8 x i1> 403 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 404 ret <8 x half> %c 405} 406 407define <8 x half> @fma_mask_load_231_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 408; CHECK-LABEL: fma_mask_load_231_v8f16: 409; CHECK: # %bb.0: 410; CHECK-NEXT: kmovd %esi, %k1 411; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1} 412; CHECK-NEXT: retq 413 %z = load <8 x half>, ptr %zp 414 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 415 %b = bitcast i8 %mask to <8 x i1> 416 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 417 ret <8 x half> %c 418} 419 420define <8 x half> @fma_mask_load_321_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 421; CHECK-LABEL: fma_mask_load_321_v8f16: 422; CHECK: # %bb.0: 423; CHECK-NEXT: kmovd %esi, %k1 424; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1} 425; CHECK-NEXT: retq 426 %z = load <8 x half>, ptr %zp 427 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x) 428 %b = bitcast i8 %mask to <8 x i1> 429 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 430 ret <8 x half> %c 431} 432 433define <8 x half> @fma_mask_load_132_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 434; CHECK-LABEL: fma_mask_load_132_v8f16: 435; CHECK: # %bb.0: 436; CHECK-NEXT: kmovd %esi, %k1 437; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1} 438; CHECK-NEXT: retq 439 %z = load <8 x half>, ptr %zp 440 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y) 441 %b = bitcast i8 %mask to <8 x i1> 442 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 443 ret <8 x half> %c 444} 445 446define <8 x half> @fma_mask_load_312_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 447; CHECK-LABEL: fma_mask_load_312_v8f16: 448; CHECK: # %bb.0: 449; CHECK-NEXT: kmovd %esi, %k1 450; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1} 451; CHECK-NEXT: retq 452 %z = load <8 x half>, ptr %zp 453 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) 454 %b = bitcast i8 %mask to <8 x i1> 455 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x 456 ret <8 x half> %c 457} 458 459define <8 x half> @fma_maskz_load_123_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 460; CHECK-LABEL: fma_maskz_load_123_v8f16: 461; CHECK: # %bb.0: 462; CHECK-NEXT: kmovd %esi, %k1 463; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1} {z} 464; CHECK-NEXT: retq 465 %z = load <8 x half>, ptr %zp 466 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) 467 %b = bitcast i8 %mask to <8 x i1> 468 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 469 ret <8 x half> %c 470} 471 472define <8 x half> @fma_maskz_load_213_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 473; CHECK-LABEL: fma_maskz_load_213_v8f16: 474; CHECK: # %bb.0: 475; CHECK-NEXT: kmovd %esi, %k1 476; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1} {z} 477; CHECK-NEXT: retq 478 %z = load <8 x half>, ptr %zp 479 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z) 480 %b = bitcast i8 %mask to <8 x i1> 481 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 482 ret <8 x half> %c 483} 484 485define <8 x half> @fma_maskz_load_231_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 486; CHECK-LABEL: fma_maskz_load_231_v8f16: 487; CHECK: # %bb.0: 488; CHECK-NEXT: kmovd %esi, %k1 489; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1} {z} 490; CHECK-NEXT: retq 491 %z = load <8 x half>, ptr %zp 492 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 493 %b = bitcast i8 %mask to <8 x i1> 494 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 495 ret <8 x half> %c 496} 497 498define <8 x half> @fma_maskz_load_321_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 499; CHECK-LABEL: fma_maskz_load_321_v8f16: 500; CHECK: # %bb.0: 501; CHECK-NEXT: kmovd %esi, %k1 502; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1} {z} 503; CHECK-NEXT: retq 504 %z = load <8 x half>, ptr %zp 505 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x) 506 %b = bitcast i8 %mask to <8 x i1> 507 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 508 ret <8 x half> %c 509} 510 511define <8 x half> @fma_maskz_load_132_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 512; CHECK-LABEL: fma_maskz_load_132_v8f16: 513; CHECK: # %bb.0: 514; CHECK-NEXT: kmovd %esi, %k1 515; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1} {z} 516; CHECK-NEXT: retq 517 %z = load <8 x half>, ptr %zp 518 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y) 519 %b = bitcast i8 %mask to <8 x i1> 520 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 521 ret <8 x half> %c 522} 523 524define <8 x half> @fma_maskz_load_312_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) { 525; CHECK-LABEL: fma_maskz_load_312_v8f16: 526; CHECK: # %bb.0: 527; CHECK-NEXT: kmovd %esi, %k1 528; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1} {z} 529; CHECK-NEXT: retq 530 %z = load <8 x half>, ptr %zp 531 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) 532 %b = bitcast i8 %mask to <8 x i1> 533 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer 534 ret <8 x half> %c 535} 536 537define <16 x half> @fma_123_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { 538; CHECK-LABEL: fma_123_v16f16: 539; CHECK: # %bb.0: 540; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 541; CHECK-NEXT: retq 542 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) 543 ret <16 x half> %a 544} 545 546define <16 x half> @fma_213_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { 547; CHECK-LABEL: fma_213_v16f16: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 550; CHECK-NEXT: retq 551 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z) 552 ret <16 x half> %a 553} 554 555define <16 x half> @fma_231_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { 556; CHECK-LABEL: fma_231_v16f16: 557; CHECK: # %bb.0: 558; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 559; CHECK-NEXT: retq 560 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x) 561 ret <16 x half> %a 562} 563 564define <16 x half> @fma_321_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { 565; CHECK-LABEL: fma_321_v16f16: 566; CHECK: # %bb.0: 567; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 568; CHECK-NEXT: retq 569 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x) 570 ret <16 x half> %a 571} 572 573define <16 x half> @fma_132_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { 574; CHECK-LABEL: fma_132_v16f16: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 577; CHECK-NEXT: retq 578 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y) 579 ret <16 x half> %a 580} 581 582define <16 x half> @fma_312_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { 583; CHECK-LABEL: fma_312_v16f16: 584; CHECK: # %bb.0: 585; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 586; CHECK-NEXT: retq 587 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y) 588 ret <16 x half> %a 589} 590 591define <16 x half> @fma_load_123_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) { 592; CHECK-LABEL: fma_load_123_v16f16: 593; CHECK: # %bb.0: 594; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 595; CHECK-NEXT: retq 596 %z = load <16 x half>, ptr %zp 597 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) 598 ret <16 x half> %a 599} 600 601define <16 x half> @fma_load_213_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) { 602; CHECK-LABEL: fma_load_213_v16f16: 603; CHECK: # %bb.0: 604; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 605; CHECK-NEXT: retq 606 %z = load <16 x half>, ptr %zp 607 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z) 608 ret <16 x half> %a 609} 610 611define <16 x half> @fma_load_231_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) { 612; CHECK-LABEL: fma_load_231_v16f16: 613; CHECK: # %bb.0: 614; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 615; CHECK-NEXT: retq 616 %z = load <16 x half>, ptr %zp 617 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x) 618 ret <16 x half> %a 619} 620 621define <16 x half> @fma_load_321_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) { 622; CHECK-LABEL: fma_load_321_v16f16: 623; CHECK: # %bb.0: 624; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 625; CHECK-NEXT: retq 626 %z = load <16 x half>, ptr %zp 627 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x) 628 ret <16 x half> %a 629} 630 631define <16 x half> @fma_load_132_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) { 632; CHECK-LABEL: fma_load_132_v16f16: 633; CHECK: # %bb.0: 634; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 635; CHECK-NEXT: retq 636 %z = load <16 x half>, ptr %zp 637 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y) 638 ret <16 x half> %a 639} 640 641define <16 x half> @fma_load_312_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) { 642; CHECK-LABEL: fma_load_312_v16f16: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 645; CHECK-NEXT: retq 646 %z = load <16 x half>, ptr %zp 647 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y) 648 ret <16 x half> %a 649} 650 651define <16 x half> @fma_mask_123_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 652; CHECK-LABEL: fma_mask_123_v16f16: 653; CHECK: # %bb.0: 654; CHECK-NEXT: kmovd %edi, %k1 655; CHECK-NEXT: vfmadd132ph %ymm1, %ymm2, %ymm0 {%k1} 656; CHECK-NEXT: retq 657 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) 658 %b = bitcast i16 %mask to <16 x i1> 659 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 660 ret <16 x half> %c 661} 662 663define <16 x half> @fma_mask_213_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 664; CHECK-LABEL: fma_mask_213_v16f16: 665; CHECK: # %bb.0: 666; CHECK-NEXT: kmovd %edi, %k1 667; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} 668; CHECK-NEXT: retq 669 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z) 670 %b = bitcast i16 %mask to <16 x i1> 671 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 672 ret <16 x half> %c 673} 674 675define <16 x half> @fma_mask_231_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 676; CHECK-LABEL: fma_mask_231_v16f16: 677; CHECK: # %bb.0: 678; CHECK-NEXT: kmovd %edi, %k1 679; CHECK-NEXT: vfmadd231ph %ymm2, %ymm1, %ymm0 {%k1} 680; CHECK-NEXT: retq 681 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x) 682 %b = bitcast i16 %mask to <16 x i1> 683 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 684 ret <16 x half> %c 685} 686 687define <16 x half> @fma_mask_321_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 688; CHECK-LABEL: fma_mask_321_v16f16: 689; CHECK: # %bb.0: 690; CHECK-NEXT: kmovd %edi, %k1 691; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 {%k1} 692; CHECK-NEXT: retq 693 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x) 694 %b = bitcast i16 %mask to <16 x i1> 695 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 696 ret <16 x half> %c 697} 698 699define <16 x half> @fma_mask_132_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 700; CHECK-LABEL: fma_mask_132_v16f16: 701; CHECK: # %bb.0: 702; CHECK-NEXT: kmovd %edi, %k1 703; CHECK-NEXT: vfmadd132ph %ymm2, %ymm1, %ymm0 {%k1} 704; CHECK-NEXT: retq 705 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y) 706 %b = bitcast i16 %mask to <16 x i1> 707 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 708 ret <16 x half> %c 709} 710 711define <16 x half> @fma_mask_312_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 712; CHECK-LABEL: fma_mask_312_v16f16: 713; CHECK: # %bb.0: 714; CHECK-NEXT: kmovd %edi, %k1 715; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 {%k1} 716; CHECK-NEXT: retq 717 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y) 718 %b = bitcast i16 %mask to <16 x i1> 719 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 720 ret <16 x half> %c 721} 722 723define <16 x half> @fma_maskz_123_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 724; CHECK-LABEL: fma_maskz_123_v16f16: 725; CHECK: # %bb.0: 726; CHECK-NEXT: kmovd %edi, %k1 727; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} {z} 728; CHECK-NEXT: retq 729 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) 730 %b = bitcast i16 %mask to <16 x i1> 731 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 732 ret <16 x half> %c 733} 734 735define <16 x half> @fma_maskz_213_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 736; CHECK-LABEL: fma_maskz_213_v16f16: 737; CHECK: # %bb.0: 738; CHECK-NEXT: kmovd %edi, %k1 739; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} {z} 740; CHECK-NEXT: retq 741 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z) 742 %b = bitcast i16 %mask to <16 x i1> 743 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 744 ret <16 x half> %c 745} 746 747define <16 x half> @fma_maskz_231_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 748; CHECK-LABEL: fma_maskz_231_v16f16: 749; CHECK: # %bb.0: 750; CHECK-NEXT: kmovd %edi, %k1 751; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 {%k1} {z} 752; CHECK-NEXT: retq 753 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x) 754 %b = bitcast i16 %mask to <16 x i1> 755 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 756 ret <16 x half> %c 757} 758 759define <16 x half> @fma_maskz_321_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 760; CHECK-LABEL: fma_maskz_321_v16f16: 761; CHECK: # %bb.0: 762; CHECK-NEXT: kmovd %edi, %k1 763; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 {%k1} {z} 764; CHECK-NEXT: retq 765 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x) 766 %b = bitcast i16 %mask to <16 x i1> 767 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 768 ret <16 x half> %c 769} 770 771define <16 x half> @fma_maskz_132_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 772; CHECK-LABEL: fma_maskz_132_v16f16: 773; CHECK: # %bb.0: 774; CHECK-NEXT: kmovd %edi, %k1 775; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 {%k1} {z} 776; CHECK-NEXT: retq 777 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y) 778 %b = bitcast i16 %mask to <16 x i1> 779 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 780 ret <16 x half> %c 781} 782 783define <16 x half> @fma_maskz_312_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) { 784; CHECK-LABEL: fma_maskz_312_v16f16: 785; CHECK: # %bb.0: 786; CHECK-NEXT: kmovd %edi, %k1 787; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 {%k1} {z} 788; CHECK-NEXT: retq 789 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y) 790 %b = bitcast i16 %mask to <16 x i1> 791 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 792 ret <16 x half> %c 793} 794 795define <16 x half> @fma_mask_load_123_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 796; CHECK-LABEL: fma_mask_load_123_v16f16: 797; CHECK: # %bb.0: 798; CHECK-NEXT: kmovd %esi, %k1 799; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1} 800; CHECK-NEXT: retq 801 %z = load <16 x half>, ptr %zp 802 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) 803 %b = bitcast i16 %mask to <16 x i1> 804 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 805 ret <16 x half> %c 806} 807 808define <16 x half> @fma_mask_load_213_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 809; CHECK-LABEL: fma_mask_load_213_v16f16: 810; CHECK: # %bb.0: 811; CHECK-NEXT: kmovd %esi, %k1 812; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1} 813; CHECK-NEXT: retq 814 %z = load <16 x half>, ptr %zp 815 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z) 816 %b = bitcast i16 %mask to <16 x i1> 817 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 818 ret <16 x half> %c 819} 820 821define <16 x half> @fma_mask_load_231_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 822; CHECK-LABEL: fma_mask_load_231_v16f16: 823; CHECK: # %bb.0: 824; CHECK-NEXT: kmovd %esi, %k1 825; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1} 826; CHECK-NEXT: retq 827 %z = load <16 x half>, ptr %zp 828 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x) 829 %b = bitcast i16 %mask to <16 x i1> 830 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 831 ret <16 x half> %c 832} 833 834define <16 x half> @fma_mask_load_321_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 835; CHECK-LABEL: fma_mask_load_321_v16f16: 836; CHECK: # %bb.0: 837; CHECK-NEXT: kmovd %esi, %k1 838; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1} 839; CHECK-NEXT: retq 840 %z = load <16 x half>, ptr %zp 841 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x) 842 %b = bitcast i16 %mask to <16 x i1> 843 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 844 ret <16 x half> %c 845} 846 847define <16 x half> @fma_mask_load_132_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 848; CHECK-LABEL: fma_mask_load_132_v16f16: 849; CHECK: # %bb.0: 850; CHECK-NEXT: kmovd %esi, %k1 851; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1} 852; CHECK-NEXT: retq 853 %z = load <16 x half>, ptr %zp 854 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y) 855 %b = bitcast i16 %mask to <16 x i1> 856 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 857 ret <16 x half> %c 858} 859 860define <16 x half> @fma_mask_load_312_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 861; CHECK-LABEL: fma_mask_load_312_v16f16: 862; CHECK: # %bb.0: 863; CHECK-NEXT: kmovd %esi, %k1 864; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1} 865; CHECK-NEXT: retq 866 %z = load <16 x half>, ptr %zp 867 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y) 868 %b = bitcast i16 %mask to <16 x i1> 869 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x 870 ret <16 x half> %c 871} 872 873define <16 x half> @fma_maskz_load_123_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 874; CHECK-LABEL: fma_maskz_load_123_v16f16: 875; CHECK: # %bb.0: 876; CHECK-NEXT: kmovd %esi, %k1 877; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1} {z} 878; CHECK-NEXT: retq 879 %z = load <16 x half>, ptr %zp 880 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) 881 %b = bitcast i16 %mask to <16 x i1> 882 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 883 ret <16 x half> %c 884} 885 886define <16 x half> @fma_maskz_load_213_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 887; CHECK-LABEL: fma_maskz_load_213_v16f16: 888; CHECK: # %bb.0: 889; CHECK-NEXT: kmovd %esi, %k1 890; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1} {z} 891; CHECK-NEXT: retq 892 %z = load <16 x half>, ptr %zp 893 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z) 894 %b = bitcast i16 %mask to <16 x i1> 895 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 896 ret <16 x half> %c 897} 898 899define <16 x half> @fma_maskz_load_231_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 900; CHECK-LABEL: fma_maskz_load_231_v16f16: 901; CHECK: # %bb.0: 902; CHECK-NEXT: kmovd %esi, %k1 903; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1} {z} 904; CHECK-NEXT: retq 905 %z = load <16 x half>, ptr %zp 906 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x) 907 %b = bitcast i16 %mask to <16 x i1> 908 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 909 ret <16 x half> %c 910} 911 912define <16 x half> @fma_maskz_load_321_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 913; CHECK-LABEL: fma_maskz_load_321_v16f16: 914; CHECK: # %bb.0: 915; CHECK-NEXT: kmovd %esi, %k1 916; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1} {z} 917; CHECK-NEXT: retq 918 %z = load <16 x half>, ptr %zp 919 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x) 920 %b = bitcast i16 %mask to <16 x i1> 921 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 922 ret <16 x half> %c 923} 924 925define <16 x half> @fma_maskz_load_132_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 926; CHECK-LABEL: fma_maskz_load_132_v16f16: 927; CHECK: # %bb.0: 928; CHECK-NEXT: kmovd %esi, %k1 929; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1} {z} 930; CHECK-NEXT: retq 931 %z = load <16 x half>, ptr %zp 932 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y) 933 %b = bitcast i16 %mask to <16 x i1> 934 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 935 ret <16 x half> %c 936} 937 938define <16 x half> @fma_maskz_load_312_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) { 939; CHECK-LABEL: fma_maskz_load_312_v16f16: 940; CHECK: # %bb.0: 941; CHECK-NEXT: kmovd %esi, %k1 942; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1} {z} 943; CHECK-NEXT: retq 944 %z = load <16 x half>, ptr %zp 945 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y) 946 %b = bitcast i16 %mask to <16 x i1> 947 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer 948 ret <16 x half> %c 949} 950 951define <32 x half> @fma_123_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) { 952; CHECK-LABEL: fma_123_v32f16: 953; CHECK: # %bb.0: 954; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 955; CHECK-NEXT: retq 956 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) 957 ret <32 x half> %a 958} 959 960define <32 x half> @fma_213_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) { 961; CHECK-LABEL: fma_213_v32f16: 962; CHECK: # %bb.0: 963; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 964; CHECK-NEXT: retq 965 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z) 966 ret <32 x half> %a 967} 968 969define <32 x half> @fma_231_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) { 970; CHECK-LABEL: fma_231_v32f16: 971; CHECK: # %bb.0: 972; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 973; CHECK-NEXT: retq 974 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x) 975 ret <32 x half> %a 976} 977 978define <32 x half> @fma_321_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) { 979; CHECK-LABEL: fma_321_v32f16: 980; CHECK: # %bb.0: 981; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 982; CHECK-NEXT: retq 983 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x) 984 ret <32 x half> %a 985} 986 987define <32 x half> @fma_132_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) { 988; CHECK-LABEL: fma_132_v32f16: 989; CHECK: # %bb.0: 990; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 991; CHECK-NEXT: retq 992 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y) 993 ret <32 x half> %a 994} 995 996define <32 x half> @fma_312_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) { 997; CHECK-LABEL: fma_312_v32f16: 998; CHECK: # %bb.0: 999; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 1000; CHECK-NEXT: retq 1001 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y) 1002 ret <32 x half> %a 1003} 1004 1005define <32 x half> @fma_load_123_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) { 1006; CHECK-LABEL: fma_load_123_v32f16: 1007; CHECK: # %bb.0: 1008; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 1009; CHECK-NEXT: retq 1010 %z = load <32 x half>, ptr %zp 1011 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) 1012 ret <32 x half> %a 1013} 1014 1015define <32 x half> @fma_load_213_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) { 1016; CHECK-LABEL: fma_load_213_v32f16: 1017; CHECK: # %bb.0: 1018; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 1019; CHECK-NEXT: retq 1020 %z = load <32 x half>, ptr %zp 1021 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z) 1022 ret <32 x half> %a 1023} 1024 1025define <32 x half> @fma_load_231_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) { 1026; CHECK-LABEL: fma_load_231_v32f16: 1027; CHECK: # %bb.0: 1028; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 1029; CHECK-NEXT: retq 1030 %z = load <32 x half>, ptr %zp 1031 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x) 1032 ret <32 x half> %a 1033} 1034 1035define <32 x half> @fma_load_321_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) { 1036; CHECK-LABEL: fma_load_321_v32f16: 1037; CHECK: # %bb.0: 1038; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 1039; CHECK-NEXT: retq 1040 %z = load <32 x half>, ptr %zp 1041 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x) 1042 ret <32 x half> %a 1043} 1044 1045define <32 x half> @fma_load_132_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) { 1046; CHECK-LABEL: fma_load_132_v32f16: 1047; CHECK: # %bb.0: 1048; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 1049; CHECK-NEXT: retq 1050 %z = load <32 x half>, ptr %zp 1051 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y) 1052 ret <32 x half> %a 1053} 1054 1055define <32 x half> @fma_load_312_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) { 1056; CHECK-LABEL: fma_load_312_v32f16: 1057; CHECK: # %bb.0: 1058; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 1059; CHECK-NEXT: retq 1060 %z = load <32 x half>, ptr %zp 1061 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y) 1062 ret <32 x half> %a 1063} 1064 1065define <32 x half> @fma_mask_123_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1066; CHECK-LABEL: fma_mask_123_v32f16: 1067; CHECK: # %bb.0: 1068; CHECK-NEXT: kmovd %edi, %k1 1069; CHECK-NEXT: vfmadd132ph %zmm1, %zmm2, %zmm0 {%k1} 1070; CHECK-NEXT: retq 1071 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) 1072 %b = bitcast i32 %mask to <32 x i1> 1073 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1074 ret <32 x half> %c 1075} 1076 1077define <32 x half> @fma_mask_213_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1078; CHECK-LABEL: fma_mask_213_v32f16: 1079; CHECK: # %bb.0: 1080; CHECK-NEXT: kmovd %edi, %k1 1081; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} 1082; CHECK-NEXT: retq 1083 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z) 1084 %b = bitcast i32 %mask to <32 x i1> 1085 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1086 ret <32 x half> %c 1087} 1088 1089define <32 x half> @fma_mask_231_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1090; CHECK-LABEL: fma_mask_231_v32f16: 1091; CHECK: # %bb.0: 1092; CHECK-NEXT: kmovd %edi, %k1 1093; CHECK-NEXT: vfmadd231ph %zmm2, %zmm1, %zmm0 {%k1} 1094; CHECK-NEXT: retq 1095 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x) 1096 %b = bitcast i32 %mask to <32 x i1> 1097 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1098 ret <32 x half> %c 1099} 1100 1101define <32 x half> @fma_mask_321_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1102; CHECK-LABEL: fma_mask_321_v32f16: 1103; CHECK: # %bb.0: 1104; CHECK-NEXT: kmovd %edi, %k1 1105; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 {%k1} 1106; CHECK-NEXT: retq 1107 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x) 1108 %b = bitcast i32 %mask to <32 x i1> 1109 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1110 ret <32 x half> %c 1111} 1112 1113define <32 x half> @fma_mask_132_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1114; CHECK-LABEL: fma_mask_132_v32f16: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: kmovd %edi, %k1 1117; CHECK-NEXT: vfmadd132ph %zmm2, %zmm1, %zmm0 {%k1} 1118; CHECK-NEXT: retq 1119 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y) 1120 %b = bitcast i32 %mask to <32 x i1> 1121 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1122 ret <32 x half> %c 1123} 1124 1125define <32 x half> @fma_mask_312_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1126; CHECK-LABEL: fma_mask_312_v32f16: 1127; CHECK: # %bb.0: 1128; CHECK-NEXT: kmovd %edi, %k1 1129; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 {%k1} 1130; CHECK-NEXT: retq 1131 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y) 1132 %b = bitcast i32 %mask to <32 x i1> 1133 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1134 ret <32 x half> %c 1135} 1136 1137define <32 x half> @fma_maskz_123_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1138; CHECK-LABEL: fma_maskz_123_v32f16: 1139; CHECK: # %bb.0: 1140; CHECK-NEXT: kmovd %edi, %k1 1141; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} {z} 1142; CHECK-NEXT: retq 1143 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) 1144 %b = bitcast i32 %mask to <32 x i1> 1145 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1146 ret <32 x half> %c 1147} 1148 1149define <32 x half> @fma_maskz_213_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1150; CHECK-LABEL: fma_maskz_213_v32f16: 1151; CHECK: # %bb.0: 1152; CHECK-NEXT: kmovd %edi, %k1 1153; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} {z} 1154; CHECK-NEXT: retq 1155 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z) 1156 %b = bitcast i32 %mask to <32 x i1> 1157 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1158 ret <32 x half> %c 1159} 1160 1161define <32 x half> @fma_maskz_231_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1162; CHECK-LABEL: fma_maskz_231_v32f16: 1163; CHECK: # %bb.0: 1164; CHECK-NEXT: kmovd %edi, %k1 1165; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 {%k1} {z} 1166; CHECK-NEXT: retq 1167 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x) 1168 %b = bitcast i32 %mask to <32 x i1> 1169 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1170 ret <32 x half> %c 1171} 1172 1173define <32 x half> @fma_maskz_321_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1174; CHECK-LABEL: fma_maskz_321_v32f16: 1175; CHECK: # %bb.0: 1176; CHECK-NEXT: kmovd %edi, %k1 1177; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 {%k1} {z} 1178; CHECK-NEXT: retq 1179 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x) 1180 %b = bitcast i32 %mask to <32 x i1> 1181 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1182 ret <32 x half> %c 1183} 1184 1185define <32 x half> @fma_maskz_132_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1186; CHECK-LABEL: fma_maskz_132_v32f16: 1187; CHECK: # %bb.0: 1188; CHECK-NEXT: kmovd %edi, %k1 1189; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 {%k1} {z} 1190; CHECK-NEXT: retq 1191 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y) 1192 %b = bitcast i32 %mask to <32 x i1> 1193 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1194 ret <32 x half> %c 1195} 1196 1197define <32 x half> @fma_maskz_312_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) { 1198; CHECK-LABEL: fma_maskz_312_v32f16: 1199; CHECK: # %bb.0: 1200; CHECK-NEXT: kmovd %edi, %k1 1201; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 {%k1} {z} 1202; CHECK-NEXT: retq 1203 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y) 1204 %b = bitcast i32 %mask to <32 x i1> 1205 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1206 ret <32 x half> %c 1207} 1208 1209define <32 x half> @fma_mask_load_123_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1210; CHECK-LABEL: fma_mask_load_123_v32f16: 1211; CHECK: # %bb.0: 1212; CHECK-NEXT: kmovd %esi, %k1 1213; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1} 1214; CHECK-NEXT: retq 1215 %z = load <32 x half>, ptr %zp 1216 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) 1217 %b = bitcast i32 %mask to <32 x i1> 1218 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1219 ret <32 x half> %c 1220} 1221 1222define <32 x half> @fma_mask_load_213_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1223; CHECK-LABEL: fma_mask_load_213_v32f16: 1224; CHECK: # %bb.0: 1225; CHECK-NEXT: kmovd %esi, %k1 1226; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1} 1227; CHECK-NEXT: retq 1228 %z = load <32 x half>, ptr %zp 1229 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z) 1230 %b = bitcast i32 %mask to <32 x i1> 1231 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1232 ret <32 x half> %c 1233} 1234 1235define <32 x half> @fma_mask_load_231_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1236; CHECK-LABEL: fma_mask_load_231_v32f16: 1237; CHECK: # %bb.0: 1238; CHECK-NEXT: kmovd %esi, %k1 1239; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1} 1240; CHECK-NEXT: retq 1241 %z = load <32 x half>, ptr %zp 1242 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x) 1243 %b = bitcast i32 %mask to <32 x i1> 1244 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1245 ret <32 x half> %c 1246} 1247 1248define <32 x half> @fma_mask_load_321_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1249; CHECK-LABEL: fma_mask_load_321_v32f16: 1250; CHECK: # %bb.0: 1251; CHECK-NEXT: kmovd %esi, %k1 1252; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1} 1253; CHECK-NEXT: retq 1254 %z = load <32 x half>, ptr %zp 1255 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x) 1256 %b = bitcast i32 %mask to <32 x i1> 1257 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1258 ret <32 x half> %c 1259} 1260 1261define <32 x half> @fma_mask_load_132_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1262; CHECK-LABEL: fma_mask_load_132_v32f16: 1263; CHECK: # %bb.0: 1264; CHECK-NEXT: kmovd %esi, %k1 1265; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1} 1266; CHECK-NEXT: retq 1267 %z = load <32 x half>, ptr %zp 1268 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y) 1269 %b = bitcast i32 %mask to <32 x i1> 1270 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1271 ret <32 x half> %c 1272} 1273 1274define <32 x half> @fma_mask_load_312_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1275; CHECK-LABEL: fma_mask_load_312_v32f16: 1276; CHECK: # %bb.0: 1277; CHECK-NEXT: kmovd %esi, %k1 1278; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1} 1279; CHECK-NEXT: retq 1280 %z = load <32 x half>, ptr %zp 1281 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y) 1282 %b = bitcast i32 %mask to <32 x i1> 1283 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x 1284 ret <32 x half> %c 1285} 1286 1287define <32 x half> @fma_maskz_load_123_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1288; CHECK-LABEL: fma_maskz_load_123_v32f16: 1289; CHECK: # %bb.0: 1290; CHECK-NEXT: kmovd %esi, %k1 1291; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1} {z} 1292; CHECK-NEXT: retq 1293 %z = load <32 x half>, ptr %zp 1294 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) 1295 %b = bitcast i32 %mask to <32 x i1> 1296 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1297 ret <32 x half> %c 1298} 1299 1300define <32 x half> @fma_maskz_load_213_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1301; CHECK-LABEL: fma_maskz_load_213_v32f16: 1302; CHECK: # %bb.0: 1303; CHECK-NEXT: kmovd %esi, %k1 1304; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1} {z} 1305; CHECK-NEXT: retq 1306 %z = load <32 x half>, ptr %zp 1307 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z) 1308 %b = bitcast i32 %mask to <32 x i1> 1309 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1310 ret <32 x half> %c 1311} 1312 1313define <32 x half> @fma_maskz_load_231_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1314; CHECK-LABEL: fma_maskz_load_231_v32f16: 1315; CHECK: # %bb.0: 1316; CHECK-NEXT: kmovd %esi, %k1 1317; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1} {z} 1318; CHECK-NEXT: retq 1319 %z = load <32 x half>, ptr %zp 1320 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x) 1321 %b = bitcast i32 %mask to <32 x i1> 1322 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1323 ret <32 x half> %c 1324} 1325 1326define <32 x half> @fma_maskz_load_321_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1327; CHECK-LABEL: fma_maskz_load_321_v32f16: 1328; CHECK: # %bb.0: 1329; CHECK-NEXT: kmovd %esi, %k1 1330; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1} {z} 1331; CHECK-NEXT: retq 1332 %z = load <32 x half>, ptr %zp 1333 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x) 1334 %b = bitcast i32 %mask to <32 x i1> 1335 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1336 ret <32 x half> %c 1337} 1338 1339define <32 x half> @fma_maskz_load_132_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1340; CHECK-LABEL: fma_maskz_load_132_v32f16: 1341; CHECK: # %bb.0: 1342; CHECK-NEXT: kmovd %esi, %k1 1343; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1} {z} 1344; CHECK-NEXT: retq 1345 %z = load <32 x half>, ptr %zp 1346 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y) 1347 %b = bitcast i32 %mask to <32 x i1> 1348 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1349 ret <32 x half> %c 1350} 1351 1352define <32 x half> @fma_maskz_load_312_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) { 1353; CHECK-LABEL: fma_maskz_load_312_v32f16: 1354; CHECK: # %bb.0: 1355; CHECK-NEXT: kmovd %esi, %k1 1356; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1} {z} 1357; CHECK-NEXT: retq 1358 %z = load <32 x half>, ptr %zp 1359 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y) 1360 %b = bitcast i32 %mask to <32 x i1> 1361 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer 1362 ret <32 x half> %c 1363} 1364