xref: /llvm-project/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll (revision ebec077e07f5d35a870f075fb665c006978d49ea)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
3
4declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
5
6define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_bst(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
7; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_bst:
8; CHECK:       ## %bb.0:
9; CHECK-NEXT:    kmovd %edi, %k1
10; CHECK-NEXT:    vfmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1}
11; CHECK-NEXT:    vmovaps %xmm2, %xmm0
12; CHECK-NEXT:    retq
13  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x2, i8 %x3)
14  ret <4 x float> %res
15}
16
17define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_bst2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
18; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_bst2:
19; CHECK:       ## %bb.0:
20; CHECK-NEXT:    kmovd %edi, %k1
21; CHECK-NEXT:    vfmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1}
22; CHECK-NEXT:    vmovaps %xmm2, %xmm0
23; CHECK-NEXT:    retq
24  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x0, <4 x float> %x2, i8 %x3)
25  ret <4 x float> %res
26}
27
28define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
29; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_128:
30; CHECK:       ## %bb.0:
31; CHECK-NEXT:    kmovd %edi, %k1
32; CHECK-NEXT:    vfmulcph %xmm1, %xmm0, %xmm2 {%k1}
33; CHECK-NEXT:    vmovaps %xmm2, %xmm0
34; CHECK-NEXT:    retq
35  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
36  ret <4 x float> %res
37}
38
39define <4 x float> @test_int_x86_avx512fp8_maskz_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
40; CHECK-LABEL: test_int_x86_avx512fp8_maskz_cfmul_ph_128:
41; CHECK:       ## %bb.0:
42; CHECK-NEXT:    kmovd %edi, %k1
43; CHECK-NEXT:    vfmulcph %xmm1, %xmm0, %xmm2 {%k1} {z}
44; CHECK-NEXT:    vmovaps %xmm2, %xmm0
45; CHECK-NEXT:    retq
46  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3)
47  ret <4 x float> %res
48}
49
50define <4 x float> @test_int_x86_avx512fp8_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2){
51; CHECK-LABEL: test_int_x86_avx512fp8_cfmul_ph_128:
52; CHECK:       ## %bb.0:
53; CHECK-NEXT:    vfmulcph %xmm1, %xmm2, %xmm0
54; CHECK-NEXT:    retq
55  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x2, <4 x float> %x1, <4 x float> %x0, i8 -1)
56  ret <4 x float> %res
57}
58
59declare <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
60
61define <8 x float> @test_int_x86_avx512fp16_mask_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
62; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_ph_256:
63; CHECK:       ## %bb.0:
64; CHECK-NEXT:    kmovd %edi, %k1
65; CHECK-NEXT:    vfmulcph %ymm1, %ymm0, %ymm2 {%k1}
66; CHECK-NEXT:    vmovaps %ymm2, %ymm0
67; CHECK-NEXT:    retq
68  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
69  ret <8 x float> %res
70}
71
72define <8 x float> @test_int_x86_avx512fp16_maskz_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
73; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_ph_256:
74; CHECK:       ## %bb.0:
75; CHECK-NEXT:    kmovd %edi, %k1
76; CHECK-NEXT:    vfmulcph %ymm1, %ymm0, %ymm2 {%k1} {z}
77; CHECK-NEXT:    vmovaps %ymm2, %ymm0
78; CHECK-NEXT:    retq
79  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3)
80  ret <8 x float> %res
81}
82
83define <8 x float> @test_int_x86_avx512fp16_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2){
84; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_256:
85; CHECK:       ## %bb.0:
86; CHECK-NEXT:    vfmulcph %ymm1, %ymm2, %ymm0
87; CHECK-NEXT:    retq
88  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x2, <8 x float> %x1, <8 x float> %x0, i8 -1)
89  ret <8 x float> %res
90}
91
92declare <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
93
94define <16 x float> @test_int_x86_avx512fp16_mask_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
95; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_ph_512:
96; CHECK:       ## %bb.0:
97; CHECK-NEXT:    kmovd %edi, %k1
98; CHECK-NEXT:    vfmulcph %zmm1, %zmm0, %zmm2 {%k1}
99; CHECK-NEXT:    vmovaps %zmm2, %zmm0
100; CHECK-NEXT:    retq
101  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
102  ret <16 x float> %res
103}
104
105define <16 x float> @test_int_x86_avx512fp16_maskz_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
106; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_ph_512:
107; CHECK:       ## %bb.0:
108; CHECK-NEXT:    kmovd %edi, %k1
109; CHECK-NEXT:    vfmulcph %zmm1, %zmm0, %zmm2 {%k1} {z}
110; CHECK-NEXT:    vmovaps %zmm2, %zmm0
111; CHECK-NEXT:    retq
112  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3, i32 4)
113  ret <16 x float> %res
114}
115
116define <16 x float> @test_int_x86_avx512fp16_cfmul_ph_512_rn(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
117; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_512_rn:
118; CHECK:       ## %bb.0:
119; CHECK-NEXT:    vfmulcph {rz-sae}, %zmm1, %zmm2, %zmm0
120; CHECK-NEXT:    retq
121  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 11)
122  ret <16 x float> %res
123}
124
125define <16 x float> @test_int_x86_avx512fp16_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
126; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_512:
127; CHECK:       ## %bb.0:
128; CHECK-NEXT:    vfmulcph %zmm1, %zmm2, %zmm0
129; CHECK-NEXT:    retq
130  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 4)
131  ret <16 x float> %res
132}
133
134declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
135
136define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_bst(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
137; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_bst:
138; CHECK:       ## %bb.0:
139; CHECK-NEXT:    kmovd %edi, %k1
140; CHECK-NEXT:    vfcmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1}
141; CHECK-NEXT:    vmovaps %xmm2, %xmm0
142; CHECK-NEXT:    retq
143  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x2, i8 %x3)
144  ret <4 x float> %res
145}
146
147; Check conjugate complex FMUL is not commutable.
148define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_bst2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
149; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_bst2:
150; CHECK:       ## %bb.0:
151; CHECK-NEXT:    kmovd %edi, %k1
152; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
153; CHECK-NEXT:    vfcmulcph %xmm0, %xmm1, %xmm2 {%k1}
154; CHECK-NEXT:    vmovaps %xmm2, %xmm0
155; CHECK-NEXT:    retq
156  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x0, <4 x float> %x2, i8 %x3)
157  ret <4 x float> %res
158}
159
160define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
161; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_128:
162; CHECK:       ## %bb.0:
163; CHECK-NEXT:    kmovd %edi, %k1
164; CHECK-NEXT:    vfcmulcph %xmm1, %xmm0, %xmm2 {%k1}
165; CHECK-NEXT:    vmovaps %xmm2, %xmm0
166; CHECK-NEXT:    retq
167  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
168  ret <4 x float> %res
169}
170
171define <4 x float> @test_int_x86_avx512fp8_maskz_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
172; CHECK-LABEL: test_int_x86_avx512fp8_maskz_cfcmul_ph_128:
173; CHECK:       ## %bb.0:
174; CHECK-NEXT:    kmovd %edi, %k1
175; CHECK-NEXT:    vfcmulcph %xmm1, %xmm0, %xmm2 {%k1} {z}
176; CHECK-NEXT:    vmovaps %xmm2, %xmm0
177; CHECK-NEXT:    retq
178  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3)
179  ret <4 x float> %res
180}
181
182define <4 x float> @test_int_x86_avx512fp8_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2){
183; CHECK-LABEL: test_int_x86_avx512fp8_cfcmul_ph_128:
184; CHECK:       ## %bb.0:
185; CHECK-NEXT:    vfcmulcph %xmm1, %xmm2, %xmm0
186; CHECK-NEXT:    retq
187  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x2, <4 x float> %x1, <4 x float> %x0, i8 -1)
188  ret <4 x float> %res
189}
190
191declare <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
192
193define <8 x float> @test_int_x86_avx512fp16_mask_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
194; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_ph_256:
195; CHECK:       ## %bb.0:
196; CHECK-NEXT:    kmovd %edi, %k1
197; CHECK-NEXT:    vfcmulcph %ymm1, %ymm0, %ymm2 {%k1}
198; CHECK-NEXT:    vmovaps %ymm2, %ymm0
199; CHECK-NEXT:    retq
200  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
201  ret <8 x float> %res
202}
203
204define <8 x float> @test_int_x86_avx512fp16_maskz_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
205; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_ph_256:
206; CHECK:       ## %bb.0:
207; CHECK-NEXT:    kmovd %edi, %k1
208; CHECK-NEXT:    vfcmulcph %ymm1, %ymm0, %ymm2 {%k1} {z}
209; CHECK-NEXT:    vmovaps %ymm2, %ymm0
210; CHECK-NEXT:    retq
211  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3)
212  ret <8 x float> %res
213}
214
215define <8 x float> @test_int_x86_avx512fp16_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2){
216; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_256:
217; CHECK:       ## %bb.0:
218; CHECK-NEXT:    vfcmulcph %ymm1, %ymm2, %ymm0
219; CHECK-NEXT:    retq
220  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x2, <8 x float> %x1, <8 x float> %x0, i8 -1)
221  ret <8 x float> %res
222}
223
224declare <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
225
226define <16 x float> @test_int_x86_avx512fp16_mask_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
227; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_ph_512:
228; CHECK:       ## %bb.0:
229; CHECK-NEXT:    kmovd %edi, %k1
230; CHECK-NEXT:    vfcmulcph %zmm1, %zmm0, %zmm2 {%k1}
231; CHECK-NEXT:    vmovaps %zmm2, %zmm0
232; CHECK-NEXT:    retq
233  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
234  ret <16 x float> %res
235}
236
237define <16 x float> @test_int_x86_avx512fp16_maskz_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
238; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_ph_512:
239; CHECK:       ## %bb.0:
240; CHECK-NEXT:    kmovd %edi, %k1
241; CHECK-NEXT:    vfcmulcph %zmm1, %zmm0, %zmm2 {%k1} {z}
242; CHECK-NEXT:    vmovaps %zmm2, %zmm0
243; CHECK-NEXT:    retq
244  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3, i32 4)
245  ret <16 x float> %res
246}
247
248define <16 x float> @test_int_x86_avx512fp16_cfcmul_ph_512_rn(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
249; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_512_rn:
250; CHECK:       ## %bb.0:
251; CHECK-NEXT:    vfcmulcph {rz-sae}, %zmm1, %zmm2, %zmm0
252; CHECK-NEXT:    retq
253  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 11)
254  ret <16 x float> %res
255}
256
257define <16 x float> @test_int_x86_avx512fp16_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
258; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_512:
259; CHECK:       ## %bb.0:
260; CHECK-NEXT:    vfcmulcph %zmm1, %zmm2, %zmm0
261; CHECK-NEXT:    retq
262  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 4)
263  ret <16 x float> %res
264}
265