1*f4a2713aSLionel Sambuc // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s 2*f4a2713aSLionel Sambuc 3*f4a2713aSLionel Sambuc // Don't include mm_malloc.h, it's system specific. 4*f4a2713aSLionel Sambuc #define __MM_MALLOC_H 5*f4a2713aSLionel Sambuc 6*f4a2713aSLionel Sambuc #include <immintrin.h> 7*f4a2713aSLionel Sambuc 8*f4a2713aSLionel Sambuc // 9*f4a2713aSLionel Sambuc // Test LLVM IR codegen of shuffle instructions 10*f4a2713aSLionel Sambuc // 11*f4a2713aSLionel Sambuc 12*f4a2713aSLionel Sambuc __m256 test__mm256_loadu_ps(void* p) { 13*f4a2713aSLionel Sambuc // CHECK: load <8 x float>* %{{.*}}, align 1 14*f4a2713aSLionel Sambuc return _mm256_loadu_ps(p); 15*f4a2713aSLionel Sambuc } 16*f4a2713aSLionel Sambuc 17*f4a2713aSLionel Sambuc __m256d test__mm256_loadu_pd(void* p) { 18*f4a2713aSLionel Sambuc // CHECK: load <4 x double>* %{{.*}}, align 1 19*f4a2713aSLionel Sambuc return _mm256_loadu_pd(p); 20*f4a2713aSLionel Sambuc } 21*f4a2713aSLionel Sambuc 22*f4a2713aSLionel Sambuc __m256i test__mm256_loadu_si256(void* p) { 23*f4a2713aSLionel Sambuc // CHECK: load <4 x i64>* %{{.+}}, align 1 24*f4a2713aSLionel Sambuc return _mm256_loadu_si256(p); 25*f4a2713aSLionel Sambuc } 26*f4a2713aSLionel Sambuc 27*f4a2713aSLionel Sambuc __m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) { 28*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestrm128 29*f4a2713aSLionel Sambuc return _mm_cmpestrm(A, LA, B, LB, 7); 30*f4a2713aSLionel Sambuc } 31*f4a2713aSLionel Sambuc 32*f4a2713aSLionel Sambuc int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) { 33*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestri128 34*f4a2713aSLionel Sambuc return _mm_cmpestri(A, LA, B, LB, 7); 35*f4a2713aSLionel Sambuc } 36*f4a2713aSLionel Sambuc 37*f4a2713aSLionel Sambuc int test_mm_cmpestra(__m128i A, int LA, __m128i B, int LB) { 38*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestria128 39*f4a2713aSLionel Sambuc return _mm_cmpestra(A, LA, B, LB, 7); 40*f4a2713aSLionel Sambuc } 41*f4a2713aSLionel Sambuc 42*f4a2713aSLionel Sambuc int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) { 43*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestric128 44*f4a2713aSLionel Sambuc return _mm_cmpestrc(A, LA, B, LB, 7); 45*f4a2713aSLionel Sambuc } 46*f4a2713aSLionel Sambuc 47*f4a2713aSLionel Sambuc int test_mm_cmpestro(__m128i A, int LA, __m128i B, int LB) { 48*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestrio128 49*f4a2713aSLionel Sambuc return _mm_cmpestro(A, LA, B, LB, 7); 50*f4a2713aSLionel Sambuc } 51*f4a2713aSLionel Sambuc 52*f4a2713aSLionel Sambuc int test_mm_cmpestrs(__m128i A, int LA, __m128i B, int LB) { 53*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestris128 54*f4a2713aSLionel Sambuc return _mm_cmpestrs(A, LA, B, LB, 7); 55*f4a2713aSLionel Sambuc } 56*f4a2713aSLionel Sambuc 57*f4a2713aSLionel Sambuc int test_mm_cmpestrz(__m128i A, int LA, __m128i B, int LB) { 58*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpestriz128 59*f4a2713aSLionel Sambuc return _mm_cmpestrz(A, LA, B, LB, 7); 60*f4a2713aSLionel Sambuc } 61*f4a2713aSLionel Sambuc 62*f4a2713aSLionel Sambuc __m128i test_mm_cmpistrm(__m128i A, __m128i B) { 63*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistrm128 64*f4a2713aSLionel Sambuc return _mm_cmpistrm(A, B, 7); 65*f4a2713aSLionel Sambuc } 66*f4a2713aSLionel Sambuc 67*f4a2713aSLionel Sambuc int test_mm_cmpistri(__m128i A, __m128i B) { 68*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistri128 69*f4a2713aSLionel Sambuc return _mm_cmpistri(A, B, 7); 70*f4a2713aSLionel Sambuc } 71*f4a2713aSLionel Sambuc 72*f4a2713aSLionel Sambuc int test_mm_cmpistra(__m128i A, __m128i B) { 73*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistria128 74*f4a2713aSLionel Sambuc return _mm_cmpistra(A, B, 7); 75*f4a2713aSLionel Sambuc } 76*f4a2713aSLionel Sambuc 77*f4a2713aSLionel Sambuc int test_mm_cmpistrc(__m128i A, __m128i B) { 78*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistric128 79*f4a2713aSLionel Sambuc return _mm_cmpistrc(A, B, 7); 80*f4a2713aSLionel Sambuc } 81*f4a2713aSLionel Sambuc 82*f4a2713aSLionel Sambuc int test_mm_cmpistro(__m128i A, __m128i B) { 83*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistrio128 84*f4a2713aSLionel Sambuc return _mm_cmpistro(A, B, 7); 85*f4a2713aSLionel Sambuc } 86*f4a2713aSLionel Sambuc 87*f4a2713aSLionel Sambuc int test_mm_cmpistrs(__m128i A, __m128i B) { 88*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistris128 89*f4a2713aSLionel Sambuc return _mm_cmpistrs(A, B, 7); 90*f4a2713aSLionel Sambuc } 91*f4a2713aSLionel Sambuc 92*f4a2713aSLionel Sambuc int test_mm_cmpistrz(__m128i A, __m128i B) { 93*f4a2713aSLionel Sambuc // CHECK: @llvm.x86.sse42.pcmpistriz128 94*f4a2713aSLionel Sambuc return _mm_cmpistrz(A, B, 7); 95*f4a2713aSLionel Sambuc } 96*f4a2713aSLionel Sambuc 97*f4a2713aSLionel Sambuc int test_extract_epi32(__m256i __a) { 98*f4a2713aSLionel Sambuc // CHECK-LABEL: @test_extract_epi32 99*f4a2713aSLionel Sambuc // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 100*f4a2713aSLionel Sambuc return _mm256_extract_epi32(__a, 8); 101*f4a2713aSLionel Sambuc } 102*f4a2713aSLionel Sambuc 103*f4a2713aSLionel Sambuc int test_extract_epi16(__m256i __a) { 104*f4a2713aSLionel Sambuc // CHECK-LABEL: @test_extract_epi16 105*f4a2713aSLionel Sambuc // CHECK: extractelement <16 x i16> %{{.*}}, i32 0 106*f4a2713aSLionel Sambuc return _mm256_extract_epi16(__a, 16); 107*f4a2713aSLionel Sambuc } 108*f4a2713aSLionel Sambuc 109*f4a2713aSLionel Sambuc int test_extract_epi8(__m256i __a) { 110*f4a2713aSLionel Sambuc // CHECK-LABEL: @test_extract_epi8 111*f4a2713aSLionel Sambuc // CHECK: extractelement <32 x i8> %{{.*}}, i32 0 112*f4a2713aSLionel Sambuc return _mm256_extract_epi8(__a, 32); 113*f4a2713aSLionel Sambuc } 114