// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s // RUN: %clang_cc1 -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include // // Test LLVM IR codegen of shuffle instructions, checking if the masks are correct // // CHECK-LABEL: define dso_local <8 x float> @x( // CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFP:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> // CHECK-NEXT: ret <8 x float> [[SHUFP]] // __m256 x(__m256 a, __m256 b) { return _mm256_shuffle_ps(a, b, 203); } // CHECK-LABEL: define dso_local <2 x double> @test_mm_permute_pd( // CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> // CHECK-NEXT: ret <2 x double> [[PERMIL]] // __m128d test_mm_permute_pd(__m128d a) { return _mm_permute_pd(a, 1); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute_pd( // CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> // CHECK-NEXT: ret <4 x double> [[PERMIL]] // __m256d test_mm256_permute_pd(__m256d a) { return _mm256_permute_pd(a, 5); } // CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps( // CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> // CHECK-NEXT: ret <4 x float> [[PERMIL]] // __m128 test_mm_permute_ps(__m128 a) { return _mm_permute_ps(a, 0x1b); } // CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps2( // CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> // CHECK-NEXT: ret <4 x float> [[PERMIL]] // __m128 test_mm_permute_ps2(__m128 a) { return _mm_permute_ps(a, 0xe6); } // CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute_ps( // CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> // CHECK-NEXT: ret <8 x float> [[PERMIL]] // __m256 test_mm256_permute_ps(__m256 a) { return _mm256_permute_ps(a, 0x1b); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute2f128_pd( // CHECK-SAME: <4 x double> noundef [[A:%.*]], <4 x double> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[VPERM:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> // CHECK-NEXT: ret <4 x double> [[VPERM]] // __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { return _mm256_permute2f128_pd(a, b, 0x31); } // CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute2f128_ps( // CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> // CHECK-NEXT: ret <8 x float> [[VPERM]] // __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { return _mm256_permute2f128_ps(a, b, 0x13); } // CHECK-LABEL: define dso_local <4 x i64> @test_mm256_permute2f128_si256( // CHECK-SAME: <4 x i64> noundef [[A:%.*]], <4 x i64> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> // CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <8 x i32> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[VPERM]] to <4 x i64> // CHECK-NEXT: ret <4 x i64> [[TMP2]] // __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { return _mm256_permute2f128_si256(a, b, 0x20); } // CHECK-LABEL: define dso_local <4 x float> @test_mm_broadcast_ss( // CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[TMP0]], i32 1 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 // CHECK-NEXT: ret <4 x float> [[VECINIT4_I]] // __m128 test_mm_broadcast_ss(float const *__a) { return _mm_broadcast_ss(__a); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_broadcast_sd( // CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[__A]], align 1 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 0 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x double> [[VECINIT_I]], double [[TMP0]], i32 1 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x double> [[VECINIT2_I]], double [[TMP0]], i32 2 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x double> [[VECINIT3_I]], double [[TMP0]], i32 3 // CHECK-NEXT: ret <4 x double> [[VECINIT4_I]] // __m256d test_mm256_broadcast_sd(double const *__a) { return _mm256_broadcast_sd(__a); } // CHECK-LABEL: define dso_local <8 x float> @test_mm256_broadcast_ss( // CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP0]], i32 0 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP0]], i32 1 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP0]], i32 4 // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP0]], i32 5 // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP0]], i32 6 // CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <8 x float> [[VECINIT7_I]], float [[TMP0]], i32 7 // CHECK-NEXT: ret <8 x float> [[VECINIT8_I]] // __m256 test_mm256_broadcast_ss(float const *__a) { return _mm256_broadcast_ss(__a); } // Make sure we have the correct mask for each insertf128 case. // CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_0( // CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> // CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> // CHECK-NEXT: ret <8 x float> [[INSERT]] // __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { return _mm256_insertf128_ps(a, b, 0); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_0( // CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> // CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> // CHECK-NEXT: ret <4 x double> [[INSERT]] // __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { return _mm256_insertf128_pd(a, b, 0); } // CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_0( // CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> // CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> // CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> // CHECK-NEXT: ret <4 x i64> [[TMP2]] // __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { return _mm256_insertf128_si256(a, b, 0); } // CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1( // CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> // CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> // CHECK-NEXT: ret <8 x float> [[INSERT]] // __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { return _mm256_insertf128_ps(a, b, 1); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_1( // CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> // CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> // CHECK-NEXT: ret <4 x double> [[INSERT]] // __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { return _mm256_insertf128_pd(a, b, 1); } // CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_1( // CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> // CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> // CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> // CHECK-NEXT: ret <4 x i64> [[TMP2]] // __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { return _mm256_insertf128_si256(a, b, 1); } // Make sure we have the correct mask for each extractf128 case. // CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_0( // CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> // CHECK-NEXT: ret <4 x float> [[EXTRACT]] // __m128 test_mm256_extractf128_ps_0(__m256 a) { return _mm256_extractf128_ps(a, 0); } // CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_0( // CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> // CHECK-NEXT: ret <2 x double> [[EXTRACT]] // __m128d test_mm256_extractf128_pd_0(__m256d a) { return _mm256_extractf128_pd(a, 0); } // CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_0( // CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> // CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> // CHECK-NEXT: ret <2 x i64> [[TMP1]] // __m128i test_mm256_extractf128_si256_0(__m256i a) { return _mm256_extractf128_si256(a, 0); } // CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_1( // CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> // CHECK-NEXT: ret <4 x float> [[EXTRACT]] // __m128 test_mm256_extractf128_ps_1(__m256 a) { return _mm256_extractf128_ps(a, 1); } // CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_1( // CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> // CHECK-NEXT: ret <2 x double> [[EXTRACT]] // __m128d test_mm256_extractf128_pd_1(__m256d a) { return _mm256_extractf128_pd(a, 1); } // CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_1( // CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> // CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> // CHECK-NEXT: ret <2 x i64> [[TMP1]] // __m128i test_mm256_extractf128_si256_1(__m256i a) { return _mm256_extractf128_si256(a, 1); } // CHECK-LABEL: define dso_local <8 x float> @test_mm256_set_m128( // CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> // CHECK-NEXT: ret <8 x float> [[SHUFFLE_I]] // __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { return _mm256_set_m128(hi, lo); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_set_m128d( // CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> // CHECK-NEXT: ret <4 x double> [[SHUFFLE_I]] // __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { return _mm256_set_m128d(hi, lo); } // CHECK-LABEL: define dso_local <4 x i64> @test_mm256_set_m128i( // CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> // CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I]] // __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { return _mm256_set_m128i(hi, lo); } // CHECK-LABEL: define dso_local <8 x float> @test_mm256_setr_m128( // CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> // CHECK-NEXT: ret <8 x float> [[SHUFFLE_I_I]] // __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { return _mm256_setr_m128(lo, hi); } // CHECK-LABEL: define dso_local <4 x double> @test_mm256_setr_m128d( // CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> // CHECK-NEXT: ret <4 x double> [[SHUFFLE_I_I]] // __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { return _mm256_setr_m128d(lo, hi); } // CHECK-LABEL: define dso_local <4 x i64> @test_mm256_setr_m128i( // CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> // CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I_I]] // __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { return _mm256_setr_m128i(lo, hi); }