1*f4a2713aSLionel Sambuc // REQUIRES: arm-registered-target
2*f4a2713aSLionel Sambuc // RUN: %clang_cc1 -triple thumbv7-apple-darwin \
3*f4a2713aSLionel Sambuc // RUN: -target-abi apcs-gnu \
4*f4a2713aSLionel Sambuc // RUN: -target-cpu cortex-a8 \
5*f4a2713aSLionel Sambuc // RUN: -mfloat-abi soft \
6*f4a2713aSLionel Sambuc // RUN: -target-feature +soft-float-abi \
7*f4a2713aSLionel Sambuc // RUN: -ffreestanding \
8*f4a2713aSLionel Sambuc // RUN: -emit-llvm -w -O1 -o - %s | FileCheck %s
9*f4a2713aSLionel Sambuc
10*f4a2713aSLionel Sambuc #include <arm_neon.h>
11*f4a2713aSLionel Sambuc
12*f4a2713aSLionel Sambuc // Check that the vget_low/vget_high intrinsics generate a single shuffle
13*f4a2713aSLionel Sambuc // without any bitcasting.
low_s8(int8x16_t a)14*f4a2713aSLionel Sambuc int8x8_t low_s8(int8x16_t a) {
15*f4a2713aSLionel Sambuc // CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16*f4a2713aSLionel Sambuc return vget_low_s8(a);
17*f4a2713aSLionel Sambuc }
18*f4a2713aSLionel Sambuc
low_u8(uint8x16_t a)19*f4a2713aSLionel Sambuc uint8x8_t low_u8 (uint8x16_t a) {
20*f4a2713aSLionel Sambuc // CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21*f4a2713aSLionel Sambuc return vget_low_u8(a);
22*f4a2713aSLionel Sambuc }
23*f4a2713aSLionel Sambuc
low_s16(int16x8_t a)24*f4a2713aSLionel Sambuc int16x4_t low_s16( int16x8_t a) {
25*f4a2713aSLionel Sambuc // CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
26*f4a2713aSLionel Sambuc return vget_low_s16(a);
27*f4a2713aSLionel Sambuc }
28*f4a2713aSLionel Sambuc
low_u16(uint16x8_t a)29*f4a2713aSLionel Sambuc uint16x4_t low_u16(uint16x8_t a) {
30*f4a2713aSLionel Sambuc // CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31*f4a2713aSLionel Sambuc return vget_low_u16(a);
32*f4a2713aSLionel Sambuc }
33*f4a2713aSLionel Sambuc
low_s32(int32x4_t a)34*f4a2713aSLionel Sambuc int32x2_t low_s32( int32x4_t a) {
35*f4a2713aSLionel Sambuc // CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
36*f4a2713aSLionel Sambuc return vget_low_s32(a);
37*f4a2713aSLionel Sambuc }
38*f4a2713aSLionel Sambuc
low_u32(uint32x4_t a)39*f4a2713aSLionel Sambuc uint32x2_t low_u32(uint32x4_t a) {
40*f4a2713aSLionel Sambuc // CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
41*f4a2713aSLionel Sambuc return vget_low_u32(a);
42*f4a2713aSLionel Sambuc }
43*f4a2713aSLionel Sambuc
low_s64(int64x2_t a)44*f4a2713aSLionel Sambuc int64x1_t low_s64( int64x2_t a) {
45*f4a2713aSLionel Sambuc // CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
46*f4a2713aSLionel Sambuc return vget_low_s64(a);
47*f4a2713aSLionel Sambuc }
48*f4a2713aSLionel Sambuc
low_u64(uint64x2_t a)49*f4a2713aSLionel Sambuc uint64x1_t low_u64(uint64x2_t a) {
50*f4a2713aSLionel Sambuc // CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
51*f4a2713aSLionel Sambuc return vget_low_u64(a);
52*f4a2713aSLionel Sambuc }
53*f4a2713aSLionel Sambuc
low_p8(poly8x16_t a)54*f4a2713aSLionel Sambuc poly8x8_t low_p8 (poly8x16_t a) {
55*f4a2713aSLionel Sambuc // CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
56*f4a2713aSLionel Sambuc return vget_low_p8(a);
57*f4a2713aSLionel Sambuc }
58*f4a2713aSLionel Sambuc
low_p16(poly16x8_t a)59*f4a2713aSLionel Sambuc poly16x4_t low_p16(poly16x8_t a) {
60*f4a2713aSLionel Sambuc // CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
61*f4a2713aSLionel Sambuc return vget_low_p16(a);
62*f4a2713aSLionel Sambuc }
63*f4a2713aSLionel Sambuc
low_f32(float32x4_t a)64*f4a2713aSLionel Sambuc float32x2_t low_f32(float32x4_t a) {
65*f4a2713aSLionel Sambuc // CHECK: shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
66*f4a2713aSLionel Sambuc return vget_low_f32(a);
67*f4a2713aSLionel Sambuc }
68*f4a2713aSLionel Sambuc
69*f4a2713aSLionel Sambuc
high_s8(int8x16_t a)70*f4a2713aSLionel Sambuc int8x8_t high_s8(int8x16_t a) {
71*f4a2713aSLionel Sambuc // CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
72*f4a2713aSLionel Sambuc return vget_high_s8(a);
73*f4a2713aSLionel Sambuc }
74*f4a2713aSLionel Sambuc
high_u8(uint8x16_t a)75*f4a2713aSLionel Sambuc uint8x8_t high_u8 (uint8x16_t a) {
76*f4a2713aSLionel Sambuc // CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
77*f4a2713aSLionel Sambuc return vget_high_u8(a);
78*f4a2713aSLionel Sambuc }
79*f4a2713aSLionel Sambuc
high_s16(int16x8_t a)80*f4a2713aSLionel Sambuc int16x4_t high_s16( int16x8_t a) {
81*f4a2713aSLionel Sambuc // CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
82*f4a2713aSLionel Sambuc return vget_high_s16(a);
83*f4a2713aSLionel Sambuc }
84*f4a2713aSLionel Sambuc
high_u16(uint16x8_t a)85*f4a2713aSLionel Sambuc uint16x4_t high_u16(uint16x8_t a) {
86*f4a2713aSLionel Sambuc // CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
87*f4a2713aSLionel Sambuc return vget_high_u16(a);
88*f4a2713aSLionel Sambuc }
89*f4a2713aSLionel Sambuc
high_s32(int32x4_t a)90*f4a2713aSLionel Sambuc int32x2_t high_s32( int32x4_t a) {
91*f4a2713aSLionel Sambuc // CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
92*f4a2713aSLionel Sambuc return vget_high_s32(a);
93*f4a2713aSLionel Sambuc }
94*f4a2713aSLionel Sambuc
high_u32(uint32x4_t a)95*f4a2713aSLionel Sambuc uint32x2_t high_u32(uint32x4_t a) {
96*f4a2713aSLionel Sambuc // CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
97*f4a2713aSLionel Sambuc return vget_high_u32(a);
98*f4a2713aSLionel Sambuc }
99*f4a2713aSLionel Sambuc
high_s64(int64x2_t a)100*f4a2713aSLionel Sambuc int64x1_t high_s64( int64x2_t a) {
101*f4a2713aSLionel Sambuc // CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
102*f4a2713aSLionel Sambuc return vget_high_s64(a);
103*f4a2713aSLionel Sambuc }
104*f4a2713aSLionel Sambuc
high_u64(uint64x2_t a)105*f4a2713aSLionel Sambuc uint64x1_t high_u64(uint64x2_t a) {
106*f4a2713aSLionel Sambuc // CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
107*f4a2713aSLionel Sambuc return vget_high_u64(a);
108*f4a2713aSLionel Sambuc }
109*f4a2713aSLionel Sambuc
high_p8(poly8x16_t a)110*f4a2713aSLionel Sambuc poly8x8_t high_p8 (poly8x16_t a) {
111*f4a2713aSLionel Sambuc // CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
112*f4a2713aSLionel Sambuc return vget_high_p8(a);
113*f4a2713aSLionel Sambuc }
114*f4a2713aSLionel Sambuc
high_p16(poly16x8_t a)115*f4a2713aSLionel Sambuc poly16x4_t high_p16(poly16x8_t a) {
116*f4a2713aSLionel Sambuc // CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
117*f4a2713aSLionel Sambuc return vget_high_p16(a);
118*f4a2713aSLionel Sambuc }
119*f4a2713aSLionel Sambuc
high_f32(float32x4_t a)120*f4a2713aSLionel Sambuc float32x2_t high_f32(float32x4_t a) {
121*f4a2713aSLionel Sambuc // CHECK: shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
122*f4a2713aSLionel Sambuc return vget_high_f32(a);
123*f4a2713aSLionel Sambuc }
124*f4a2713aSLionel Sambuc
125