1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s 3 4declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) 5 6define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_bst(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 7; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_bst: 8; CHECK: ## %bb.0: 9; CHECK-NEXT: kmovd %edi, %k1 10; CHECK-NEXT: vfmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1} 11; CHECK-NEXT: vmovaps %xmm2, %xmm0 12; CHECK-NEXT: retq 13 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x2, i8 %x3) 14 ret <4 x float> %res 15} 16 17define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_bst2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 18; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_bst2: 19; CHECK: ## %bb.0: 20; CHECK-NEXT: kmovd %edi, %k1 21; CHECK-NEXT: vfmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1} 22; CHECK-NEXT: vmovaps %xmm2, %xmm0 23; CHECK-NEXT: retq 24 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x0, <4 x float> %x2, i8 %x3) 25 ret <4 x float> %res 26} 27 28define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 29; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_128: 30; CHECK: ## %bb.0: 31; CHECK-NEXT: kmovd %edi, %k1 32; CHECK-NEXT: vfmulcph %xmm1, %xmm0, %xmm2 {%k1} 33; CHECK-NEXT: vmovaps %xmm2, %xmm0 34; CHECK-NEXT: retq 35 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 36 ret <4 x float> %res 37} 38 39define <4 x float> @test_int_x86_avx512fp8_maskz_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 40; CHECK-LABEL: test_int_x86_avx512fp8_maskz_cfmul_ph_128: 41; CHECK: ## %bb.0: 42; CHECK-NEXT: kmovd %edi, %k1 43; CHECK-NEXT: vfmulcph %xmm1, %xmm0, %xmm2 {%k1} {z} 44; CHECK-NEXT: vmovaps %xmm2, %xmm0 45; CHECK-NEXT: retq 46 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3) 47 ret <4 x float> %res 48} 49 50define <4 x float> @test_int_x86_avx512fp8_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2){ 51; CHECK-LABEL: test_int_x86_avx512fp8_cfmul_ph_128: 52; CHECK: ## %bb.0: 53; CHECK-NEXT: vfmulcph %xmm1, %xmm2, %xmm0 54; CHECK-NEXT: retq 55 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x2, <4 x float> %x1, <4 x float> %x0, i8 -1) 56 ret <4 x float> %res 57} 58 59declare <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8) 60 61define <8 x float> @test_int_x86_avx512fp16_mask_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){ 62; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_ph_256: 63; CHECK: ## %bb.0: 64; CHECK-NEXT: kmovd %edi, %k1 65; CHECK-NEXT: vfmulcph %ymm1, %ymm0, %ymm2 {%k1} 66; CHECK-NEXT: vmovaps %ymm2, %ymm0 67; CHECK-NEXT: retq 68 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 69 ret <8 x float> %res 70} 71 72define <8 x float> @test_int_x86_avx512fp16_maskz_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){ 73; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_ph_256: 74; CHECK: ## %bb.0: 75; CHECK-NEXT: kmovd %edi, %k1 76; CHECK-NEXT: vfmulcph %ymm1, %ymm0, %ymm2 {%k1} {z} 77; CHECK-NEXT: vmovaps %ymm2, %ymm0 78; CHECK-NEXT: retq 79 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3) 80 ret <8 x float> %res 81} 82 83define <8 x float> @test_int_x86_avx512fp16_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2){ 84; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_256: 85; CHECK: ## %bb.0: 86; CHECK-NEXT: vfmulcph %ymm1, %ymm2, %ymm0 87; CHECK-NEXT: retq 88 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x2, <8 x float> %x1, <8 x float> %x0, i8 -1) 89 ret <8 x float> %res 90} 91 92declare <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 93 94define <16 x float> @test_int_x86_avx512fp16_mask_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 95; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_ph_512: 96; CHECK: ## %bb.0: 97; CHECK-NEXT: kmovd %edi, %k1 98; CHECK-NEXT: vfmulcph %zmm1, %zmm0, %zmm2 {%k1} 99; CHECK-NEXT: vmovaps %zmm2, %zmm0 100; CHECK-NEXT: retq 101 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 102 ret <16 x float> %res 103} 104 105define <16 x float> @test_int_x86_avx512fp16_maskz_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 106; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_ph_512: 107; CHECK: ## %bb.0: 108; CHECK-NEXT: kmovd %edi, %k1 109; CHECK-NEXT: vfmulcph %zmm1, %zmm0, %zmm2 {%k1} {z} 110; CHECK-NEXT: vmovaps %zmm2, %zmm0 111; CHECK-NEXT: retq 112 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3, i32 4) 113 ret <16 x float> %res 114} 115 116define <16 x float> @test_int_x86_avx512fp16_cfmul_ph_512_rn(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 117; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_512_rn: 118; CHECK: ## %bb.0: 119; CHECK-NEXT: vfmulcph {rz-sae}, %zmm1, %zmm2, %zmm0 120; CHECK-NEXT: retq 121 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 11) 122 ret <16 x float> %res 123} 124 125define <16 x float> @test_int_x86_avx512fp16_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 126; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_512: 127; CHECK: ## %bb.0: 128; CHECK-NEXT: vfmulcph %zmm1, %zmm2, %zmm0 129; CHECK-NEXT: retq 130 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 4) 131 ret <16 x float> %res 132} 133 134declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) 135 136define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_bst(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 137; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_bst: 138; CHECK: ## %bb.0: 139; CHECK-NEXT: kmovd %edi, %k1 140; CHECK-NEXT: vfcmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1} 141; CHECK-NEXT: vmovaps %xmm2, %xmm0 142; CHECK-NEXT: retq 143 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x2, i8 %x3) 144 ret <4 x float> %res 145} 146 147; Check conjugate complex FMUL is not commutable. 148define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_bst2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 149; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_bst2: 150; CHECK: ## %bb.0: 151; CHECK-NEXT: kmovd %edi, %k1 152; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 153; CHECK-NEXT: vfcmulcph %xmm0, %xmm1, %xmm2 {%k1} 154; CHECK-NEXT: vmovaps %xmm2, %xmm0 155; CHECK-NEXT: retq 156 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x0, <4 x float> %x2, i8 %x3) 157 ret <4 x float> %res 158} 159 160define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 161; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_128: 162; CHECK: ## %bb.0: 163; CHECK-NEXT: kmovd %edi, %k1 164; CHECK-NEXT: vfcmulcph %xmm1, %xmm0, %xmm2 {%k1} 165; CHECK-NEXT: vmovaps %xmm2, %xmm0 166; CHECK-NEXT: retq 167 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 168 ret <4 x float> %res 169} 170 171define <4 x float> @test_int_x86_avx512fp8_maskz_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){ 172; CHECK-LABEL: test_int_x86_avx512fp8_maskz_cfcmul_ph_128: 173; CHECK: ## %bb.0: 174; CHECK-NEXT: kmovd %edi, %k1 175; CHECK-NEXT: vfcmulcph %xmm1, %xmm0, %xmm2 {%k1} {z} 176; CHECK-NEXT: vmovaps %xmm2, %xmm0 177; CHECK-NEXT: retq 178 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3) 179 ret <4 x float> %res 180} 181 182define <4 x float> @test_int_x86_avx512fp8_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2){ 183; CHECK-LABEL: test_int_x86_avx512fp8_cfcmul_ph_128: 184; CHECK: ## %bb.0: 185; CHECK-NEXT: vfcmulcph %xmm1, %xmm2, %xmm0 186; CHECK-NEXT: retq 187 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x2, <4 x float> %x1, <4 x float> %x0, i8 -1) 188 ret <4 x float> %res 189} 190 191declare <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8) 192 193define <8 x float> @test_int_x86_avx512fp16_mask_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){ 194; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_ph_256: 195; CHECK: ## %bb.0: 196; CHECK-NEXT: kmovd %edi, %k1 197; CHECK-NEXT: vfcmulcph %ymm1, %ymm0, %ymm2 {%k1} 198; CHECK-NEXT: vmovaps %ymm2, %ymm0 199; CHECK-NEXT: retq 200 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 201 ret <8 x float> %res 202} 203 204define <8 x float> @test_int_x86_avx512fp16_maskz_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){ 205; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_ph_256: 206; CHECK: ## %bb.0: 207; CHECK-NEXT: kmovd %edi, %k1 208; CHECK-NEXT: vfcmulcph %ymm1, %ymm0, %ymm2 {%k1} {z} 209; CHECK-NEXT: vmovaps %ymm2, %ymm0 210; CHECK-NEXT: retq 211 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3) 212 ret <8 x float> %res 213} 214 215define <8 x float> @test_int_x86_avx512fp16_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2){ 216; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_256: 217; CHECK: ## %bb.0: 218; CHECK-NEXT: vfcmulcph %ymm1, %ymm2, %ymm0 219; CHECK-NEXT: retq 220 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x2, <8 x float> %x1, <8 x float> %x0, i8 -1) 221 ret <8 x float> %res 222} 223 224declare <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 225 226define <16 x float> @test_int_x86_avx512fp16_mask_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 227; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_ph_512: 228; CHECK: ## %bb.0: 229; CHECK-NEXT: kmovd %edi, %k1 230; CHECK-NEXT: vfcmulcph %zmm1, %zmm0, %zmm2 {%k1} 231; CHECK-NEXT: vmovaps %zmm2, %zmm0 232; CHECK-NEXT: retq 233 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 234 ret <16 x float> %res 235} 236 237define <16 x float> @test_int_x86_avx512fp16_maskz_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 238; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_ph_512: 239; CHECK: ## %bb.0: 240; CHECK-NEXT: kmovd %edi, %k1 241; CHECK-NEXT: vfcmulcph %zmm1, %zmm0, %zmm2 {%k1} {z} 242; CHECK-NEXT: vmovaps %zmm2, %zmm0 243; CHECK-NEXT: retq 244 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3, i32 4) 245 ret <16 x float> %res 246} 247 248define <16 x float> @test_int_x86_avx512fp16_cfcmul_ph_512_rn(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 249; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_512_rn: 250; CHECK: ## %bb.0: 251; CHECK-NEXT: vfcmulcph {rz-sae}, %zmm1, %zmm2, %zmm0 252; CHECK-NEXT: retq 253 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 11) 254 ret <16 x float> %res 255} 256 257define <16 x float> @test_int_x86_avx512fp16_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 258; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_512: 259; CHECK: ## %bb.0: 260; CHECK-NEXT: vfcmulcph %zmm1, %zmm2, %zmm0 261; CHECK-NEXT: retq 262 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 4) 263 ret <16 x float> %res 264} 265