1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-mulc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE 3; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-mulc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE 4 5define <16 x float> @fmulcph(<16 x float> %a0, <16 x float> %a1) { 6; ENABLE-LABEL: fmulcph: 7; ENABLE: # %bb.0: 8; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 9; ENABLE-NEXT: #APP 10; ENABLE-NEXT: nop 11; ENABLE-NEXT: #NO_APP 12; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 13; ENABLE-NEXT: vpxor %xmm2, %xmm2, %xmm2 14; ENABLE-NEXT: vfmulcph %zmm1, %zmm0, %zmm2 15; ENABLE-NEXT: vmovaps %zmm2, %zmm0 16; ENABLE-NEXT: retq 17; 18; DISABLE-LABEL: fmulcph: 19; DISABLE: # %bb.0: 20; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 21; DISABLE-NEXT: #APP 22; DISABLE-NEXT: nop 23; DISABLE-NEXT: #NO_APP 24; DISABLE-NEXT: vfmulcph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 # 64-byte Folded Reload 25; DISABLE-NEXT: vmovaps %zmm2, %zmm0 26; DISABLE-NEXT: retq 27 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 28 %2 = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> undef, i16 -1, i32 4) 29 ret <16 x float> %2 30} 31 32define <16 x float> @fmulcph_mem(<16 x float> %a0, ptr %p1) { 33; ENABLE-LABEL: fmulcph_mem: 34; ENABLE: # %bb.0: 35; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 36; ENABLE-NEXT: #APP 37; ENABLE-NEXT: nop 38; ENABLE-NEXT: #NO_APP 39; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 40; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 41; ENABLE-NEXT: vfmulcph (%rdi), %zmm0, %zmm1 42; ENABLE-NEXT: vmovaps %zmm1, %zmm0 43; ENABLE-NEXT: retq 44; 45; DISABLE-LABEL: fmulcph_mem: 46; DISABLE: # %bb.0: 47; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 48; DISABLE-NEXT: #APP 49; DISABLE-NEXT: nop 50; DISABLE-NEXT: #NO_APP 51; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 52; DISABLE-NEXT: vfmulcph (%rdi), %zmm0, %zmm1 53; DISABLE-NEXT: vmovaps %zmm1, %zmm0 54; DISABLE-NEXT: retq 55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 56 %a1 = load <16 x float>, ptr %p1, align 64 57 %2 = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> undef, i16 -1, i32 4) 58 ret <16 x float> %2 59} 60 61define <16 x float> @fmulcph_broadcast(<16 x float> %a0, ptr %p1) { 62; ENABLE-LABEL: fmulcph_broadcast: 63; ENABLE: # %bb.0: 64; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 65; ENABLE-NEXT: #APP 66; ENABLE-NEXT: nop 67; ENABLE-NEXT: #NO_APP 68; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 69; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 70; ENABLE-NEXT: vfmulcph (%rdi){1to16}, %zmm0, %zmm1 71; ENABLE-NEXT: vmovaps %zmm1, %zmm0 72; ENABLE-NEXT: retq 73; 74; DISABLE-LABEL: fmulcph_broadcast: 75; DISABLE: # %bb.0: 76; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 77; DISABLE-NEXT: #APP 78; DISABLE-NEXT: nop 79; DISABLE-NEXT: #NO_APP 80; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 81; DISABLE-NEXT: vfmulcph (%rdi){1to16}, %zmm0, %zmm1 82; DISABLE-NEXT: vmovaps %zmm1, %zmm0 83; DISABLE-NEXT: retq 84 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 85 %v1 = load float, ptr %p1, align 4 86 %t0 = insertelement <16 x float> undef, float %v1, i64 0 87 %a1 = shufflevector <16 x float> %t0, <16 x float> undef, <16 x i32> zeroinitializer 88 %2 = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> undef, i16 -1, i32 4) 89 ret <16 x float> %2 90} 91 92define <16 x float> @fmulcph_maskz(<16 x float> %a0, <16 x float> %a1, ptr %mask) { 93; ENABLE-LABEL: fmulcph_maskz: 94; ENABLE: # %bb.0: 95; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 96; ENABLE-NEXT: #APP 97; ENABLE-NEXT: nop 98; ENABLE-NEXT: #NO_APP 99; ENABLE-NEXT: kmovw (%rdi), %k1 100; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 101; ENABLE-NEXT: vpxor %xmm2, %xmm2, %xmm2 102; ENABLE-NEXT: vfmulcph %zmm1, %zmm0, %zmm2 {%k1} {z} 103; ENABLE-NEXT: vmovaps %zmm2, %zmm0 104; ENABLE-NEXT: retq 105; 106; DISABLE-LABEL: fmulcph_maskz: 107; DISABLE: # %bb.0: 108; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 109; DISABLE-NEXT: #APP 110; DISABLE-NEXT: nop 111; DISABLE-NEXT: #NO_APP 112; DISABLE-NEXT: kmovw (%rdi), %k1 113; DISABLE-NEXT: vfmulcph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} {z} # 64-byte Folded Reload 114; DISABLE-NEXT: vmovaps %zmm2, %zmm0 115; DISABLE-NEXT: retq 116 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 117 %2 = load i16, ptr %mask 118 %3 = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 %2, i32 4) 119 ret <16 x float> %3 120} 121 122define <16 x float> @fcmulcph(<16 x float> %a0, <16 x float> %a1) { 123; ENABLE-LABEL: fcmulcph: 124; ENABLE: # %bb.0: 125; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 126; ENABLE-NEXT: #APP 127; ENABLE-NEXT: nop 128; ENABLE-NEXT: #NO_APP 129; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 130; ENABLE-NEXT: vpxor %xmm2, %xmm2, %xmm2 131; ENABLE-NEXT: vfcmulcph %zmm1, %zmm0, %zmm2 132; ENABLE-NEXT: vmovaps %zmm2, %zmm0 133; ENABLE-NEXT: retq 134; 135; DISABLE-LABEL: fcmulcph: 136; DISABLE: # %bb.0: 137; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 138; DISABLE-NEXT: #APP 139; DISABLE-NEXT: nop 140; DISABLE-NEXT: #NO_APP 141; DISABLE-NEXT: vfcmulcph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 # 64-byte Folded Reload 142; DISABLE-NEXT: vmovaps %zmm2, %zmm0 143; DISABLE-NEXT: retq 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 145 %2 = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> undef, i16 -1, i32 4) 146 ret <16 x float> %2 147} 148 149define <16 x float> @fcmulcph_mem(<16 x float> %a0, ptr %p1) { 150; ENABLE-LABEL: fcmulcph_mem: 151; ENABLE: # %bb.0: 152; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 153; ENABLE-NEXT: #APP 154; ENABLE-NEXT: nop 155; ENABLE-NEXT: #NO_APP 156; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 157; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 158; ENABLE-NEXT: vfcmulcph (%rdi), %zmm0, %zmm1 159; ENABLE-NEXT: vmovaps %zmm1, %zmm0 160; ENABLE-NEXT: retq 161; 162; DISABLE-LABEL: fcmulcph_mem: 163; DISABLE: # %bb.0: 164; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 165; DISABLE-NEXT: #APP 166; DISABLE-NEXT: nop 167; DISABLE-NEXT: #NO_APP 168; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 169; DISABLE-NEXT: vfcmulcph (%rdi), %zmm0, %zmm1 170; DISABLE-NEXT: vmovaps %zmm1, %zmm0 171; DISABLE-NEXT: retq 172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 173 %a1 = load <16 x float>, ptr %p1, align 64 174 %2 = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> undef, i16 -1, i32 4) 175 ret <16 x float> %2 176} 177 178define <16 x float> @fcmulcph_broadcast(<16 x float> %a0, ptr %p1) { 179; ENABLE-LABEL: fcmulcph_broadcast: 180; ENABLE: # %bb.0: 181; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 182; ENABLE-NEXT: #APP 183; ENABLE-NEXT: nop 184; ENABLE-NEXT: #NO_APP 185; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 186; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 187; ENABLE-NEXT: vfcmulcph (%rdi){1to16}, %zmm0, %zmm1 188; ENABLE-NEXT: vmovaps %zmm1, %zmm0 189; ENABLE-NEXT: retq 190; 191; DISABLE-LABEL: fcmulcph_broadcast: 192; DISABLE: # %bb.0: 193; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 194; DISABLE-NEXT: #APP 195; DISABLE-NEXT: nop 196; DISABLE-NEXT: #NO_APP 197; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 198; DISABLE-NEXT: vfcmulcph (%rdi){1to16}, %zmm0, %zmm1 199; DISABLE-NEXT: vmovaps %zmm1, %zmm0 200; DISABLE-NEXT: retq 201 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 202 %v1 = load float, ptr %p1, align 4 203 %t0 = insertelement <16 x float> undef, float %v1, i64 0 204 %a1 = shufflevector <16 x float> %t0, <16 x float> undef, <16 x i32> zeroinitializer 205 %2 = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> undef, i16 -1, i32 4) 206 ret <16 x float> %2 207} 208 209define <16 x float> @fcmulcph_maskz(<16 x float> %a0, <16 x float> %a1, ptr %mask) { 210; ENABLE-LABEL: fcmulcph_maskz: 211; ENABLE: # %bb.0: 212; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 213; ENABLE-NEXT: #APP 214; ENABLE-NEXT: nop 215; ENABLE-NEXT: #NO_APP 216; ENABLE-NEXT: kmovw (%rdi), %k1 217; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 218; ENABLE-NEXT: vpxor %xmm2, %xmm2, %xmm2 219; ENABLE-NEXT: vfcmulcph %zmm1, %zmm0, %zmm2 {%k1} {z} 220; ENABLE-NEXT: vmovaps %zmm2, %zmm0 221; ENABLE-NEXT: retq 222; 223; DISABLE-LABEL: fcmulcph_maskz: 224; DISABLE: # %bb.0: 225; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 226; DISABLE-NEXT: #APP 227; DISABLE-NEXT: nop 228; DISABLE-NEXT: #NO_APP 229; DISABLE-NEXT: kmovw (%rdi), %k1 230; DISABLE-NEXT: vfcmulcph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} {z} # 64-byte Folded Reload 231; DISABLE-NEXT: vmovaps %zmm2, %zmm0 232; DISABLE-NEXT: retq 233 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 234 %2 = load i16, ptr %mask 235 %3 = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 %2, i32 4) 236 ret <16 x float> %3 237} 238 239define <4 x float> @fmulc(<4 x float> %a0, <4 x float> %a1) { 240; ENABLE-LABEL: fmulc: 241; ENABLE: # %bb.0: 242; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 243; ENABLE-NEXT: #APP 244; ENABLE-NEXT: nop 245; ENABLE-NEXT: #NO_APP 246; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 247; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 248; ENABLE-NEXT: vfmulcph %xmm1, %xmm0, %xmm2 249; ENABLE-NEXT: vmovaps %xmm2, %xmm0 250; ENABLE-NEXT: retq 251; 252; DISABLE-LABEL: fmulc: 253; DISABLE: # %bb.0: 254; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 255; DISABLE-NEXT: #APP 256; DISABLE-NEXT: nop 257; DISABLE-NEXT: #NO_APP 258; DISABLE-NEXT: vfmulcph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 # 16-byte Folded Reload 259; DISABLE-NEXT: vmovaps %xmm2, %xmm0 260; DISABLE-NEXT: retq 261 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 262 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1) 263 ret <4 x float> %2 264} 265 266define <4 x float> @fmulc_mem(<4 x float> %a0, ptr %p1) { 267; ENABLE-LABEL: fmulc_mem: 268; ENABLE: # %bb.0: 269; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 270; ENABLE-NEXT: #APP 271; ENABLE-NEXT: nop 272; ENABLE-NEXT: #NO_APP 273; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 274; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 275; ENABLE-NEXT: vfmulcph (%rdi), %xmm0, %xmm1 276; ENABLE-NEXT: vmovaps %xmm1, %xmm0 277; ENABLE-NEXT: retq 278; 279; DISABLE-LABEL: fmulc_mem: 280; DISABLE: # %bb.0: 281; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 282; DISABLE-NEXT: #APP 283; DISABLE-NEXT: nop 284; DISABLE-NEXT: #NO_APP 285; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 286; DISABLE-NEXT: vfmulcph (%rdi), %xmm0, %xmm1 287; DISABLE-NEXT: vmovaps %xmm1, %xmm0 288; DISABLE-NEXT: retq 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 290 %a1 = load <4 x float>, ptr %p1, align 64 291 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1) 292 ret <4 x float> %2 293} 294 295define <4 x float> @fmulc_broadcast(<4 x float> %a0, ptr %p1) { 296; ENABLE-LABEL: fmulc_broadcast: 297; ENABLE: # %bb.0: 298; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 299; ENABLE-NEXT: #APP 300; ENABLE-NEXT: nop 301; ENABLE-NEXT: #NO_APP 302; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 303; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 304; ENABLE-NEXT: vfmulcph (%rdi){1to4}, %xmm0, %xmm1 305; ENABLE-NEXT: vmovaps %xmm1, %xmm0 306; ENABLE-NEXT: retq 307; 308; DISABLE-LABEL: fmulc_broadcast: 309; DISABLE: # %bb.0: 310; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 311; DISABLE-NEXT: #APP 312; DISABLE-NEXT: nop 313; DISABLE-NEXT: #NO_APP 314; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 315; DISABLE-NEXT: vfmulcph (%rdi){1to4}, %xmm0, %xmm1 316; DISABLE-NEXT: vmovaps %xmm1, %xmm0 317; DISABLE-NEXT: retq 318 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 319 %v1 = load float, ptr %p1, align 4 320 %t0 = insertelement <4 x float> undef, float %v1, i64 0 321 %a1 = shufflevector <4 x float> %t0, <4 x float> undef, <4 x i32> zeroinitializer 322 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1) 323 ret <4 x float> %2 324} 325 326define <4 x float> @fmulc_maskz(<4 x float> %a0, <4 x float> %a1, ptr %mask) { 327; ENABLE-LABEL: fmulc_maskz: 328; ENABLE: # %bb.0: 329; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 330; ENABLE-NEXT: #APP 331; ENABLE-NEXT: nop 332; ENABLE-NEXT: #NO_APP 333; ENABLE-NEXT: kmovb (%rdi), %k1 334; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 335; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 336; ENABLE-NEXT: vfmulcph %xmm1, %xmm0, %xmm2 {%k1} {z} 337; ENABLE-NEXT: vmovaps %xmm2, %xmm0 338; ENABLE-NEXT: retq 339; 340; DISABLE-LABEL: fmulc_maskz: 341; DISABLE: # %bb.0: 342; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 343; DISABLE-NEXT: #APP 344; DISABLE-NEXT: nop 345; DISABLE-NEXT: #NO_APP 346; DISABLE-NEXT: kmovb (%rdi), %k1 347; DISABLE-NEXT: vfmulcph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} {z} # 16-byte Folded Reload 348; DISABLE-NEXT: vmovaps %xmm2, %xmm0 349; DISABLE-NEXT: retq 350 351 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 352 %2 = load i8, ptr %mask 353 %3 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %2) 354 ret <4 x float> %3 355} 356 357define <4 x float> @fcmulc(<4 x float> %a0, <4 x float> %a1) { 358; ENABLE-LABEL: fcmulc: 359; ENABLE: # %bb.0: 360; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 361; ENABLE-NEXT: #APP 362; ENABLE-NEXT: nop 363; ENABLE-NEXT: #NO_APP 364; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 365; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 366; ENABLE-NEXT: vfcmulcph %xmm1, %xmm0, %xmm2 367; ENABLE-NEXT: vmovaps %xmm2, %xmm0 368; ENABLE-NEXT: retq 369; 370; DISABLE-LABEL: fcmulc: 371; DISABLE: # %bb.0: 372; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 373; DISABLE-NEXT: #APP 374; DISABLE-NEXT: nop 375; DISABLE-NEXT: #NO_APP 376; DISABLE-NEXT: vfcmulcph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 # 16-byte Folded Reload 377; DISABLE-NEXT: vmovaps %xmm2, %xmm0 378; DISABLE-NEXT: retq 379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 380 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1) 381 ret <4 x float> %2 382} 383 384define <4 x float> @fcmulc_mem(<4 x float> %a0, ptr %p1) { 385; ENABLE-LABEL: fcmulc_mem: 386; ENABLE: # %bb.0: 387; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 388; ENABLE-NEXT: #APP 389; ENABLE-NEXT: nop 390; ENABLE-NEXT: #NO_APP 391; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 392; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 393; ENABLE-NEXT: vfcmulcph (%rdi), %xmm0, %xmm1 394; ENABLE-NEXT: vmovaps %xmm1, %xmm0 395; ENABLE-NEXT: retq 396; 397; DISABLE-LABEL: fcmulc_mem: 398; DISABLE: # %bb.0: 399; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 400; DISABLE-NEXT: #APP 401; DISABLE-NEXT: nop 402; DISABLE-NEXT: #NO_APP 403; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 404; DISABLE-NEXT: vfcmulcph (%rdi), %xmm0, %xmm1 405; DISABLE-NEXT: vmovaps %xmm1, %xmm0 406; DISABLE-NEXT: retq 407 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 408 %a1 = load <4 x float>, ptr %p1, align 64 409 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1) 410 ret <4 x float> %2 411} 412 413define <4 x float> @fcmulc_broadcast(<4 x float> %a0, ptr %p1) { 414; ENABLE-LABEL: fcmulc_broadcast: 415; ENABLE: # %bb.0: 416; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 417; ENABLE-NEXT: #APP 418; ENABLE-NEXT: nop 419; ENABLE-NEXT: #NO_APP 420; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 421; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 422; ENABLE-NEXT: vfcmulcph (%rdi){1to4}, %xmm0, %xmm1 423; ENABLE-NEXT: vmovaps %xmm1, %xmm0 424; ENABLE-NEXT: retq 425; 426; DISABLE-LABEL: fcmulc_broadcast: 427; DISABLE: # %bb.0: 428; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 429; DISABLE-NEXT: #APP 430; DISABLE-NEXT: nop 431; DISABLE-NEXT: #NO_APP 432; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 433; DISABLE-NEXT: vfcmulcph (%rdi){1to4}, %xmm0, %xmm1 434; DISABLE-NEXT: vmovaps %xmm1, %xmm0 435; DISABLE-NEXT: retq 436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 437 %v1 = load float, ptr %p1, align 4 438 %t0 = insertelement <4 x float> undef, float %v1, i64 0 439 %a1 = shufflevector <4 x float> %t0, <4 x float> undef, <4 x i32> zeroinitializer 440 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1) 441 ret <4 x float> %2 442} 443 444define <4 x float> @fcmulc_maskz(<4 x float> %a0, <4 x float> %a1, ptr %mask) { 445; ENABLE-LABEL: fcmulc_maskz: 446; ENABLE: # %bb.0: 447; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 448; ENABLE-NEXT: #APP 449; ENABLE-NEXT: nop 450; ENABLE-NEXT: #NO_APP 451; ENABLE-NEXT: kmovb (%rdi), %k1 452; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 453; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 454; ENABLE-NEXT: vfcmulcph %xmm1, %xmm0, %xmm2 {%k1} {z} 455; ENABLE-NEXT: vmovaps %xmm2, %xmm0 456; ENABLE-NEXT: retq 457; 458; DISABLE-LABEL: fcmulc_maskz: 459; DISABLE: # %bb.0: 460; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 461; DISABLE-NEXT: #APP 462; DISABLE-NEXT: nop 463; DISABLE-NEXT: #NO_APP 464; DISABLE-NEXT: kmovb (%rdi), %k1 465; DISABLE-NEXT: vfcmulcph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} {z} # 16-byte Folded Reload 466; DISABLE-NEXT: vmovaps %xmm2, %xmm0 467; DISABLE-NEXT: retq 468 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 469 %2 = load i8, ptr %mask 470 %3 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %2) 471 ret <4 x float> %3 472} 473 474define <8 x float> @fmulc_ymm(<8 x float> %a0, <8 x float> %a1) { 475; ENABLE-LABEL: fmulc_ymm: 476; ENABLE: # %bb.0: 477; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 478; ENABLE-NEXT: #APP 479; ENABLE-NEXT: nop 480; ENABLE-NEXT: #NO_APP 481; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 482; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 483; ENABLE-NEXT: vfmulcph %ymm1, %ymm0, %ymm2 484; ENABLE-NEXT: vmovaps %ymm2, %ymm0 485; ENABLE-NEXT: retq 486; 487; DISABLE-LABEL: fmulc_ymm: 488; DISABLE: # %bb.0: 489; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 490; DISABLE-NEXT: #APP 491; DISABLE-NEXT: nop 492; DISABLE-NEXT: #NO_APP 493; DISABLE-NEXT: vfmulcph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 # 32-byte Folded Reload 494; DISABLE-NEXT: vmovaps %ymm2, %ymm0 495; DISABLE-NEXT: retq 496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 497 %2 = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> undef, i8 -1) 498 ret <8 x float> %2 499} 500 501define <8 x float> @fmulc_ymm_mem(<8 x float> %a0, ptr %p1) { 502; ENABLE-LABEL: fmulc_ymm_mem: 503; ENABLE: # %bb.0: 504; ENABLE-NEXT: #APP 505; ENABLE-NEXT: nop 506; ENABLE-NEXT: #NO_APP 507; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 508; ENABLE-NEXT: vfmulcph (%rdi), %ymm0, %ymm1 509; ENABLE-NEXT: vmovaps %ymm1, %ymm0 510; ENABLE-NEXT: retq 511; 512; DISABLE-LABEL: fmulc_ymm_mem: 513; DISABLE: # %bb.0: 514; DISABLE-NEXT: #APP 515; DISABLE-NEXT: nop 516; DISABLE-NEXT: #NO_APP 517; DISABLE-NEXT: vfmulcph (%rdi), %ymm0, %ymm1 518; DISABLE-NEXT: vmovaps %ymm1, %ymm0 519; DISABLE-NEXT: retq 520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 521 %a1 = load <8 x float>, ptr %p1, align 64 522 %2 = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> undef, i8 -1) 523 ret <8 x float> %2 524} 525 526define <8 x float> @fmulc_ymm_broadcast(<8 x float> %a0, ptr %p1) { 527; ENABLE-LABEL: fmulc_ymm_broadcast: 528; ENABLE: # %bb.0: 529; ENABLE-NEXT: #APP 530; ENABLE-NEXT: nop 531; ENABLE-NEXT: #NO_APP 532; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 533; ENABLE-NEXT: vfmulcph (%rdi){1to8}, %ymm0, %ymm1 534; ENABLE-NEXT: vmovaps %ymm1, %ymm0 535; ENABLE-NEXT: retq 536; 537; DISABLE-LABEL: fmulc_ymm_broadcast: 538; DISABLE: # %bb.0: 539; DISABLE-NEXT: #APP 540; DISABLE-NEXT: nop 541; DISABLE-NEXT: #NO_APP 542; DISABLE-NEXT: vfmulcph (%rdi){1to8}, %ymm0, %ymm1 543; DISABLE-NEXT: vmovaps %ymm1, %ymm0 544; DISABLE-NEXT: retq 545 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 546 %v1 = load float, ptr %p1, align 4 547 %t0 = insertelement <8 x float> undef, float %v1, i64 0 548 %a1 = shufflevector <8 x float> %t0, <8 x float> undef, <8 x i32> zeroinitializer 549 %2 = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> undef, i8 -1) 550 ret <8 x float> %2 551} 552 553define <8 x float> @fmulc_maskz_ymm(<8 x float> %a0, <8 x float> %a1, ptr %mask) { 554; ENABLE-LABEL: fmulc_maskz_ymm: 555; ENABLE: # %bb.0: 556; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 557; ENABLE-NEXT: #APP 558; ENABLE-NEXT: nop 559; ENABLE-NEXT: #NO_APP 560; ENABLE-NEXT: kmovb (%rdi), %k1 561; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 562; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 563; ENABLE-NEXT: vfmulcph %ymm1, %ymm0, %ymm2 {%k1} {z} 564; ENABLE-NEXT: vmovaps %ymm2, %ymm0 565; ENABLE-NEXT: retq 566; 567; DISABLE-LABEL: fmulc_maskz_ymm: 568; DISABLE: # %bb.0: 569; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 570; DISABLE-NEXT: #APP 571; DISABLE-NEXT: nop 572; DISABLE-NEXT: #NO_APP 573; DISABLE-NEXT: kmovb (%rdi), %k1 574; DISABLE-NEXT: vfmulcph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} {z} # 32-byte Folded Reload 575; DISABLE-NEXT: vmovaps %ymm2, %ymm0 576; DISABLE-NEXT: retq 577 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 578 %2 = load i8, ptr %mask 579 %3 = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer, i8 %2) 580 ret <8 x float> %3 581} 582 583define <8 x float> @fcmulc_ymm(<8 x float> %a0, <8 x float> %a1) { 584; ENABLE-LABEL: fcmulc_ymm: 585; ENABLE: # %bb.0: 586; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 587; ENABLE-NEXT: #APP 588; ENABLE-NEXT: nop 589; ENABLE-NEXT: #NO_APP 590; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 591; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 592; ENABLE-NEXT: vfcmulcph %ymm1, %ymm0, %ymm2 593; ENABLE-NEXT: vmovaps %ymm2, %ymm0 594; ENABLE-NEXT: retq 595; 596; DISABLE-LABEL: fcmulc_ymm: 597; DISABLE: # %bb.0: 598; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 599; DISABLE-NEXT: #APP 600; DISABLE-NEXT: nop 601; DISABLE-NEXT: #NO_APP 602; DISABLE-NEXT: vfcmulcph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 # 32-byte Folded Reload 603; DISABLE-NEXT: vmovaps %ymm2, %ymm0 604; DISABLE-NEXT: retq 605 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 606 %2 = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> undef, i8 -1) 607 ret <8 x float> %2 608} 609 610define <8 x float> @fcmulc_ymm_mem(<8 x float> %a0, ptr %p1) { 611; ENABLE-LABEL: fcmulc_ymm_mem: 612; ENABLE: # %bb.0: 613; ENABLE-NEXT: #APP 614; ENABLE-NEXT: nop 615; ENABLE-NEXT: #NO_APP 616; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 617; ENABLE-NEXT: vfcmulcph (%rdi), %ymm0, %ymm1 618; ENABLE-NEXT: vmovaps %ymm1, %ymm0 619; ENABLE-NEXT: retq 620; 621; DISABLE-LABEL: fcmulc_ymm_mem: 622; DISABLE: # %bb.0: 623; DISABLE-NEXT: #APP 624; DISABLE-NEXT: nop 625; DISABLE-NEXT: #NO_APP 626; DISABLE-NEXT: vfcmulcph (%rdi), %ymm0, %ymm1 627; DISABLE-NEXT: vmovaps %ymm1, %ymm0 628; DISABLE-NEXT: retq 629 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 630 %a1 = load <8 x float>, ptr %p1, align 64 631 %2 = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> undef, i8 -1) 632 ret <8 x float> %2 633} 634 635define <8 x float> @fcmulc_ymm_broadcast(<8 x float> %a0, ptr %p1) { 636; ENABLE-LABEL: fcmulc_ymm_broadcast: 637; ENABLE: # %bb.0: 638; ENABLE-NEXT: #APP 639; ENABLE-NEXT: nop 640; ENABLE-NEXT: #NO_APP 641; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 642; ENABLE-NEXT: vfcmulcph (%rdi){1to8}, %ymm0, %ymm1 643; ENABLE-NEXT: vmovaps %ymm1, %ymm0 644; ENABLE-NEXT: retq 645; 646; DISABLE-LABEL: fcmulc_ymm_broadcast: 647; DISABLE: # %bb.0: 648; DISABLE-NEXT: #APP 649; DISABLE-NEXT: nop 650; DISABLE-NEXT: #NO_APP 651; DISABLE-NEXT: vfcmulcph (%rdi){1to8}, %ymm0, %ymm1 652; DISABLE-NEXT: vmovaps %ymm1, %ymm0 653; DISABLE-NEXT: retq 654 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 655 %v1 = load float, ptr %p1, align 4 656 %t0 = insertelement <8 x float> undef, float %v1, i64 0 657 %a1 = shufflevector <8 x float> %t0, <8 x float> undef, <8 x i32> zeroinitializer 658 %2 = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> undef, i8 -1) 659 ret <8 x float> %2 660} 661 662define <8 x float> @fcmulc_maskz_ymm(<8 x float> %a0, <8 x float> %a1, ptr %mask) { 663; ENABLE-LABEL: fcmulc_maskz_ymm: 664; ENABLE: # %bb.0: 665; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 666; ENABLE-NEXT: #APP 667; ENABLE-NEXT: nop 668; ENABLE-NEXT: #NO_APP 669; ENABLE-NEXT: kmovb (%rdi), %k1 670; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 671; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 672; ENABLE-NEXT: vfcmulcph %ymm1, %ymm0, %ymm2 {%k1} {z} 673; ENABLE-NEXT: vmovaps %ymm2, %ymm0 674; ENABLE-NEXT: retq 675; 676; DISABLE-LABEL: fcmulc_maskz_ymm: 677; DISABLE: # %bb.0: 678; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 679; DISABLE-NEXT: #APP 680; DISABLE-NEXT: nop 681; DISABLE-NEXT: #NO_APP 682; DISABLE-NEXT: kmovb (%rdi), %k1 683; DISABLE-NEXT: vfcmulcph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} {z} # 32-byte Folded Reload 684; DISABLE-NEXT: vmovaps %ymm2, %ymm0 685; DISABLE-NEXT: retq 686 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 687 %2 = load i8, ptr %mask 688 %3 = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer, i8 %2) 689 ret <8 x float> %3 690} 691 692define <4 x float> @fmulcsh(<4 x float> %a0, <4 x float> %a1) { 693; ENABLE-LABEL: fmulcsh: 694; ENABLE: # %bb.0: 695; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 696; ENABLE-NEXT: #APP 697; ENABLE-NEXT: nop 698; ENABLE-NEXT: #NO_APP 699; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 700; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 701; ENABLE-NEXT: vfmulcsh %xmm1, %xmm0, %xmm2 702; ENABLE-NEXT: vmovaps %xmm2, %xmm0 703; ENABLE-NEXT: retq 704; 705; DISABLE-LABEL: fmulcsh: 706; DISABLE: # %bb.0: 707; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 708; DISABLE-NEXT: #APP 709; DISABLE-NEXT: nop 710; DISABLE-NEXT: #NO_APP 711; DISABLE-NEXT: vfmulcsh {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 # 16-byte Folded Reload 712; DISABLE-NEXT: vmovaps %xmm2, %xmm0 713; DISABLE-NEXT: retq 714 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 715 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) 716 ret <4 x float> %2 717} 718 719define <4 x float> @fmulcsh_mem(<4 x float> %a0, ptr %p1) { 720; ENABLE-LABEL: fmulcsh_mem: 721; ENABLE: # %bb.0: 722; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 723; ENABLE-NEXT: #APP 724; ENABLE-NEXT: nop 725; ENABLE-NEXT: #NO_APP 726; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 727; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 728; ENABLE-NEXT: vfmulcsh (%rdi), %xmm0, %xmm1 729; ENABLE-NEXT: vmovaps %xmm1, %xmm0 730; ENABLE-NEXT: retq 731; 732; DISABLE-LABEL: fmulcsh_mem: 733; DISABLE: # %bb.0: 734; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 735; DISABLE-NEXT: #APP 736; DISABLE-NEXT: nop 737; DISABLE-NEXT: #NO_APP 738; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 739; DISABLE-NEXT: vfmulcsh (%rdi), %xmm0, %xmm1 740; DISABLE-NEXT: vmovaps %xmm1, %xmm0 741; DISABLE-NEXT: retq 742 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 743 %a1 = load <4 x float>, ptr %p1, align 64 744 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) 745 ret <4 x float> %2 746} 747 748define <4 x float> @fmulcsh_maskz(<4 x float> %a0, <4 x float> %a1, ptr %mask) { 749; ENABLE-LABEL: fmulcsh_maskz: 750; ENABLE: # %bb.0: 751; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 752; ENABLE-NEXT: #APP 753; ENABLE-NEXT: nop 754; ENABLE-NEXT: #NO_APP 755; ENABLE-NEXT: kmovb (%rdi), %k1 756; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 757; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 758; ENABLE-NEXT: vfmulcsh %xmm1, %xmm0, %xmm2 {%k1} {z} 759; ENABLE-NEXT: vmovaps %xmm2, %xmm0 760; ENABLE-NEXT: retq 761; 762; DISABLE-LABEL: fmulcsh_maskz: 763; DISABLE: # %bb.0: 764; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 765; DISABLE-NEXT: #APP 766; DISABLE-NEXT: nop 767; DISABLE-NEXT: #NO_APP 768; DISABLE-NEXT: kmovb (%rdi), %k1 769; DISABLE-NEXT: vfmulcsh {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} {z} # 16-byte Folded Reload 770; DISABLE-NEXT: vmovaps %xmm2, %xmm0 771; DISABLE-NEXT: retq 772 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 773 %2 = load i8, ptr %mask 774 %3 = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %2, i32 4) 775 ret <4 x float> %3 776} 777 778define <4 x float> @fcmulcsh(<4 x float> %a0, <4 x float> %a1) { 779; ENABLE-LABEL: fcmulcsh: 780; ENABLE: # %bb.0: 781; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 782; ENABLE-NEXT: #APP 783; ENABLE-NEXT: nop 784; ENABLE-NEXT: #NO_APP 785; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 786; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 787; ENABLE-NEXT: vfcmulcsh %xmm1, %xmm0, %xmm2 788; ENABLE-NEXT: vmovaps %xmm2, %xmm0 789; ENABLE-NEXT: retq 790; 791; DISABLE-LABEL: fcmulcsh: 792; DISABLE: # %bb.0: 793; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 794; DISABLE-NEXT: #APP 795; DISABLE-NEXT: nop 796; DISABLE-NEXT: #NO_APP 797; DISABLE-NEXT: vfcmulcsh {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 # 16-byte Folded Reload 798; DISABLE-NEXT: vmovaps %xmm2, %xmm0 799; DISABLE-NEXT: retq 800 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 801 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) 802 ret <4 x float> %2 803} 804 805define <4 x float> @fcmulcsh_mem(<4 x float> %a0, ptr %p1) { 806; ENABLE-LABEL: fcmulcsh_mem: 807; ENABLE: # %bb.0: 808; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 809; ENABLE-NEXT: #APP 810; ENABLE-NEXT: nop 811; ENABLE-NEXT: #NO_APP 812; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 813; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 814; ENABLE-NEXT: vfcmulcsh (%rdi), %xmm0, %xmm1 815; ENABLE-NEXT: vmovaps %xmm1, %xmm0 816; ENABLE-NEXT: retq 817; 818; DISABLE-LABEL: fcmulcsh_mem: 819; DISABLE: # %bb.0: 820; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 821; DISABLE-NEXT: #APP 822; DISABLE-NEXT: nop 823; DISABLE-NEXT: #NO_APP 824; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 825; DISABLE-NEXT: vfcmulcsh (%rdi), %xmm0, %xmm1 826; DISABLE-NEXT: vmovaps %xmm1, %xmm0 827; DISABLE-NEXT: retq 828 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 829 %a1 = load <4 x float>, ptr %p1, align 64 830 %2 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) 831 ret <4 x float> %2 832} 833 834define <4 x float> @fcmulcsh_maskz(<4 x float> %a0, <4 x float> %a1, ptr %mask) { 835; ENABLE-LABEL: fcmulcsh_maskz: 836; ENABLE: # %bb.0: 837; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 838; ENABLE-NEXT: #APP 839; ENABLE-NEXT: nop 840; ENABLE-NEXT: #NO_APP 841; ENABLE-NEXT: kmovb (%rdi), %k1 842; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 843; ENABLE-NEXT: vxorps %xmm2, %xmm2, %xmm2 844; ENABLE-NEXT: vfcmulcsh %xmm1, %xmm0, %xmm2 {%k1} {z} 845; ENABLE-NEXT: vmovaps %xmm2, %xmm0 846; ENABLE-NEXT: retq 847; 848; DISABLE-LABEL: fcmulcsh_maskz: 849; DISABLE: # %bb.0: 850; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 851; DISABLE-NEXT: #APP 852; DISABLE-NEXT: nop 853; DISABLE-NEXT: #NO_APP 854; DISABLE-NEXT: kmovb (%rdi), %k1 855; DISABLE-NEXT: vfcmulcsh {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} {z} # 16-byte Folded Reload 856; DISABLE-NEXT: vmovaps %xmm2, %xmm0 857; DISABLE-NEXT: retq 858 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 859 %2 = load i8, ptr %mask 860 %3 = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %2, i32 4) 861 ret <4 x float> %3 862} 863 864declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32) 865declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32) 866declare <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 867declare <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 868declare <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8) 869declare <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8) 870declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) 871declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) 872 873