1*0a6a1f1dSLionel Sambuc // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Werror | FileCheck %s
2*0a6a1f1dSLionel Sambuc
3*0a6a1f1dSLionel Sambuc // Don't include mm_malloc.h, it's system specific.
4*0a6a1f1dSLionel Sambuc #define __MM_MALLOC_H
5*0a6a1f1dSLionel Sambuc
6*0a6a1f1dSLionel Sambuc #include <immintrin.h>
7*0a6a1f1dSLionel Sambuc
test_mm512_sqrt_pd(__m512d a)8*0a6a1f1dSLionel Sambuc __m512d test_mm512_sqrt_pd(__m512d a)
9*0a6a1f1dSLionel Sambuc {
10*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_sqrt_pd
11*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.sqrt.pd.512
12*0a6a1f1dSLionel Sambuc return _mm512_sqrt_pd(a);
13*0a6a1f1dSLionel Sambuc }
14*0a6a1f1dSLionel Sambuc
test_mm512_sqrt_ps(__m512 a)15*0a6a1f1dSLionel Sambuc __m512 test_mm512_sqrt_ps(__m512 a)
16*0a6a1f1dSLionel Sambuc {
17*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_sqrt_ps
18*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.sqrt.ps.512
19*0a6a1f1dSLionel Sambuc return _mm512_sqrt_ps(a);
20*0a6a1f1dSLionel Sambuc }
21*0a6a1f1dSLionel Sambuc
test_mm512_rsqrt14_pd(__m512d a)22*0a6a1f1dSLionel Sambuc __m512d test_mm512_rsqrt14_pd(__m512d a)
23*0a6a1f1dSLionel Sambuc {
24*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_rsqrt14_pd
25*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.rsqrt14.pd.512
26*0a6a1f1dSLionel Sambuc return _mm512_rsqrt14_pd(a);
27*0a6a1f1dSLionel Sambuc }
28*0a6a1f1dSLionel Sambuc
test_mm512_rsqrt14_ps(__m512 a)29*0a6a1f1dSLionel Sambuc __m512 test_mm512_rsqrt14_ps(__m512 a)
30*0a6a1f1dSLionel Sambuc {
31*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_rsqrt14_ps
32*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.rsqrt14.ps.512
33*0a6a1f1dSLionel Sambuc return _mm512_rsqrt14_ps(a);
34*0a6a1f1dSLionel Sambuc }
35*0a6a1f1dSLionel Sambuc
test_mm512_add_ps(__m512 a,__m512 b)36*0a6a1f1dSLionel Sambuc __m512 test_mm512_add_ps(__m512 a, __m512 b)
37*0a6a1f1dSLionel Sambuc {
38*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_add_ps
39*0a6a1f1dSLionel Sambuc // CHECK: fadd <16 x float>
40*0a6a1f1dSLionel Sambuc return _mm512_add_ps(a, b);
41*0a6a1f1dSLionel Sambuc }
42*0a6a1f1dSLionel Sambuc
test_mm512_add_pd(__m512d a,__m512d b)43*0a6a1f1dSLionel Sambuc __m512d test_mm512_add_pd(__m512d a, __m512d b)
44*0a6a1f1dSLionel Sambuc {
45*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_add_pd
46*0a6a1f1dSLionel Sambuc // CHECK: fadd <8 x double>
47*0a6a1f1dSLionel Sambuc return _mm512_add_pd(a, b);
48*0a6a1f1dSLionel Sambuc }
49*0a6a1f1dSLionel Sambuc
test_mm512_mul_ps(__m512 a,__m512 b)50*0a6a1f1dSLionel Sambuc __m512 test_mm512_mul_ps(__m512 a, __m512 b)
51*0a6a1f1dSLionel Sambuc {
52*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_mul_ps
53*0a6a1f1dSLionel Sambuc // CHECK: fmul <16 x float>
54*0a6a1f1dSLionel Sambuc return _mm512_mul_ps(a, b);
55*0a6a1f1dSLionel Sambuc }
56*0a6a1f1dSLionel Sambuc
test_mm512_mul_pd(__m512d a,__m512d b)57*0a6a1f1dSLionel Sambuc __m512d test_mm512_mul_pd(__m512d a, __m512d b)
58*0a6a1f1dSLionel Sambuc {
59*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_mul_pd
60*0a6a1f1dSLionel Sambuc // CHECK: fmul <8 x double>
61*0a6a1f1dSLionel Sambuc return _mm512_mul_pd(a, b);
62*0a6a1f1dSLionel Sambuc }
63*0a6a1f1dSLionel Sambuc
test_mm512_storeu_ps(void * p,__m512 a)64*0a6a1f1dSLionel Sambuc void test_mm512_storeu_ps(void *p, __m512 a)
65*0a6a1f1dSLionel Sambuc {
66*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_storeu_ps
67*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.storeu.ps.512
68*0a6a1f1dSLionel Sambuc _mm512_storeu_ps(p, a);
69*0a6a1f1dSLionel Sambuc }
70*0a6a1f1dSLionel Sambuc
test_mm512_storeu_pd(void * p,__m512d a)71*0a6a1f1dSLionel Sambuc void test_mm512_storeu_pd(void *p, __m512d a)
72*0a6a1f1dSLionel Sambuc {
73*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_storeu_pd
74*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.storeu.pd.512
75*0a6a1f1dSLionel Sambuc _mm512_storeu_pd(p, a);
76*0a6a1f1dSLionel Sambuc }
77*0a6a1f1dSLionel Sambuc
test_mm512_store_ps(void * p,__m512 a)78*0a6a1f1dSLionel Sambuc void test_mm512_store_ps(void *p, __m512 a)
79*0a6a1f1dSLionel Sambuc {
80*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_store_ps
81*0a6a1f1dSLionel Sambuc // CHECK: store <16 x float>
82*0a6a1f1dSLionel Sambuc _mm512_store_ps(p, a);
83*0a6a1f1dSLionel Sambuc }
84*0a6a1f1dSLionel Sambuc
test_mm512_store_pd(void * p,__m512d a)85*0a6a1f1dSLionel Sambuc void test_mm512_store_pd(void *p, __m512d a)
86*0a6a1f1dSLionel Sambuc {
87*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_store_pd
88*0a6a1f1dSLionel Sambuc // CHECK: store <8 x double>
89*0a6a1f1dSLionel Sambuc _mm512_store_pd(p, a);
90*0a6a1f1dSLionel Sambuc }
91*0a6a1f1dSLionel Sambuc
test_mm512_loadu_ps(void * p)92*0a6a1f1dSLionel Sambuc __m512 test_mm512_loadu_ps(void *p)
93*0a6a1f1dSLionel Sambuc {
94*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_loadu_ps
95*0a6a1f1dSLionel Sambuc // CHECK: load <16 x float>* {{.*}}, align 1{{$}}
96*0a6a1f1dSLionel Sambuc return _mm512_loadu_ps(p);
97*0a6a1f1dSLionel Sambuc }
98*0a6a1f1dSLionel Sambuc
test_mm512_loadu_pd(void * p)99*0a6a1f1dSLionel Sambuc __m512d test_mm512_loadu_pd(void *p)
100*0a6a1f1dSLionel Sambuc {
101*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_loadu_pd
102*0a6a1f1dSLionel Sambuc // CHECK: load <8 x double>* {{.*}}, align 1{{$}}
103*0a6a1f1dSLionel Sambuc return _mm512_loadu_pd(p);
104*0a6a1f1dSLionel Sambuc }
105*0a6a1f1dSLionel Sambuc
test_mm512_set1_pd(double d)106*0a6a1f1dSLionel Sambuc __m512d test_mm512_set1_pd(double d)
107*0a6a1f1dSLionel Sambuc {
108*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_set1_pd
109*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 0
110*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 1
111*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 2
112*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 3
113*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 4
114*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 5
115*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 6
116*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 7
117*0a6a1f1dSLionel Sambuc return _mm512_set1_pd(d);
118*0a6a1f1dSLionel Sambuc }
119*0a6a1f1dSLionel Sambuc
test_mm512_castpd256_pd512(__m256d a)120*0a6a1f1dSLionel Sambuc __m512d test_mm512_castpd256_pd512(__m256d a)
121*0a6a1f1dSLionel Sambuc {
122*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_castpd256_pd512
123*0a6a1f1dSLionel Sambuc // CHECK: shufflevector <4 x double> {{.*}} <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
124*0a6a1f1dSLionel Sambuc return _mm512_castpd256_pd512(a);
125*0a6a1f1dSLionel Sambuc }
126*0a6a1f1dSLionel Sambuc
test_mm512_knot(__mmask16 a)127*0a6a1f1dSLionel Sambuc __mmask16 test_mm512_knot(__mmask16 a)
128*0a6a1f1dSLionel Sambuc {
129*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_knot
130*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.knot.w
131*0a6a1f1dSLionel Sambuc return _mm512_knot(a);
132*0a6a1f1dSLionel Sambuc }
133*0a6a1f1dSLionel Sambuc
test_mm512_valign_epi64(__m512i a,__m512i b)134*0a6a1f1dSLionel Sambuc __m512i test_mm512_valign_epi64(__m512i a, __m512i b)
135*0a6a1f1dSLionel Sambuc {
136*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_valign_epi64
137*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.valign.q.512
138*0a6a1f1dSLionel Sambuc return _mm512_valign_epi64(a, b, 2);
139*0a6a1f1dSLionel Sambuc }
140*0a6a1f1dSLionel Sambuc
test_mm512_broadcastsd_pd(__m128d a)141*0a6a1f1dSLionel Sambuc __m512d test_mm512_broadcastsd_pd(__m128d a)
142*0a6a1f1dSLionel Sambuc {
143*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_broadcastsd_pd
144*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 0
145*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 1
146*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 2
147*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 3
148*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 4
149*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 5
150*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 6
151*0a6a1f1dSLionel Sambuc // CHECK: insertelement <8 x double> {{.*}}, i32 7
152*0a6a1f1dSLionel Sambuc return _mm512_broadcastsd_pd(a);
153*0a6a1f1dSLionel Sambuc }
154*0a6a1f1dSLionel Sambuc
test_mm512_fmadd_pd(__m512d a,__m512d b,__m512d c)155*0a6a1f1dSLionel Sambuc __m512i test_mm512_fmadd_pd(__m512d a, __m512d b, __m512d c)
156*0a6a1f1dSLionel Sambuc {
157*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_fmadd_pd
158*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.fma.mask.vfmadd.pd.512
159*0a6a1f1dSLionel Sambuc return _mm512_fmadd_pd(a, b, c);
160*0a6a1f1dSLionel Sambuc }
161*0a6a1f1dSLionel Sambuc
test_mm512_cmpeq_epi32_mask(__m512i __a,__m512i __b)162*0a6a1f1dSLionel Sambuc __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
163*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_cmpeq_epi32_mask
164*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.512
165*0a6a1f1dSLionel Sambuc return (__mmask16)_mm512_cmpeq_epi32_mask(__a, __b);
166*0a6a1f1dSLionel Sambuc }
167*0a6a1f1dSLionel Sambuc
test_mm512_mask_cmpeq_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)168*0a6a1f1dSLionel Sambuc __mmask16 test_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
169*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_mask_cmpeq_epi32_mask
170*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.512
171*0a6a1f1dSLionel Sambuc return (__mmask16)_mm512_mask_cmpeq_epi32_mask(__u, __a, __b);
172*0a6a1f1dSLionel Sambuc }
173*0a6a1f1dSLionel Sambuc
test_mm512_mask_cmpeq_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)174*0a6a1f1dSLionel Sambuc __mmask8 test_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
175*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_mask_cmpeq_epi64_mask
176*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.512
177*0a6a1f1dSLionel Sambuc return (__mmask8)_mm512_mask_cmpeq_epi64_mask(__u, __a, __b);
178*0a6a1f1dSLionel Sambuc }
179*0a6a1f1dSLionel Sambuc
test_mm512_cmpeq_epi64_mask(__m512i __a,__m512i __b)180*0a6a1f1dSLionel Sambuc __mmask8 test_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
181*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_cmpeq_epi64_mask
182*0a6a1f1dSLionel Sambuc // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.512
183*0a6a1f1dSLionel Sambuc return (__mmask8)_mm512_cmpeq_epi64_mask(__a, __b);
184*0a6a1f1dSLionel Sambuc }
185*0a6a1f1dSLionel Sambuc
test_mm512_unpackhi_pd(__m512d a,__m512d b)186*0a6a1f1dSLionel Sambuc __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b)
187*0a6a1f1dSLionel Sambuc {
188*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_unpackhi_pd
189*0a6a1f1dSLionel Sambuc // CHECK: shufflevector <8 x double> {{.*}} <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
190*0a6a1f1dSLionel Sambuc return _mm512_unpackhi_pd(a, b);
191*0a6a1f1dSLionel Sambuc }
192*0a6a1f1dSLionel Sambuc
test_mm512_unpacklo_pd(__m512d a,__m512d b)193*0a6a1f1dSLionel Sambuc __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b)
194*0a6a1f1dSLionel Sambuc {
195*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_unpacklo_pd
196*0a6a1f1dSLionel Sambuc // CHECK: shufflevector <8 x double> {{.*}} <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
197*0a6a1f1dSLionel Sambuc return _mm512_unpacklo_pd(a, b);
198*0a6a1f1dSLionel Sambuc }
199*0a6a1f1dSLionel Sambuc
test_mm512_unpackhi_ps(__m512d a,__m512d b)200*0a6a1f1dSLionel Sambuc __m512d test_mm512_unpackhi_ps(__m512d a, __m512d b)
201*0a6a1f1dSLionel Sambuc {
202*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_unpackhi_ps
203*0a6a1f1dSLionel Sambuc // CHECK: shufflevector <16 x float> {{.*}} <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
204*0a6a1f1dSLionel Sambuc return _mm512_unpackhi_ps(a, b);
205*0a6a1f1dSLionel Sambuc }
206*0a6a1f1dSLionel Sambuc
test_mm512_unpacklo_ps(__m512d a,__m512d b)207*0a6a1f1dSLionel Sambuc __m512d test_mm512_unpacklo_ps(__m512d a, __m512d b)
208*0a6a1f1dSLionel Sambuc {
209*0a6a1f1dSLionel Sambuc // CHECK-LABEL: @test_mm512_unpacklo_ps
210*0a6a1f1dSLionel Sambuc // CHECK: shufflevector <16 x float> {{.*}} <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
211*0a6a1f1dSLionel Sambuc return _mm512_unpacklo_ps(a, b);
212*0a6a1f1dSLionel Sambuc }
213