1*0a6a1f1dSLionel Sambuc // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
2*0a6a1f1dSLionel Sambuc // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - %s | FileCheck -check-prefix=CHECK-CODEGEN %s
3*0a6a1f1dSLionel Sambuc // REQUIRES: aarch64-registered-target
4*0a6a1f1dSLionel Sambuc // Test ARM64 SIMD max/min intrinsics
5*0a6a1f1dSLionel Sambuc
6*0a6a1f1dSLionel Sambuc #include <arm_neon.h>
7*0a6a1f1dSLionel Sambuc
8*0a6a1f1dSLionel Sambuc // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit reduction
test_vmaxv_s8(int8x8_t a1)9*0a6a1f1dSLionel Sambuc int8_t test_vmaxv_s8(int8x8_t a1) {
10*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxv_s8
11*0a6a1f1dSLionel Sambuc return vmaxv_s8(a1);
12*0a6a1f1dSLionel Sambuc // CHECK @llvm.aarch64.neon.smaxv.i32.v8i8
13*0a6a1f1dSLionel Sambuc }
14*0a6a1f1dSLionel Sambuc
test_vminvq_u16(uint16x8_t a1)15*0a6a1f1dSLionel Sambuc uint16_t test_vminvq_u16(uint16x8_t a1) {
16*0a6a1f1dSLionel Sambuc // CHECK: test_vminvq_u16
17*0a6a1f1dSLionel Sambuc return vminvq_u16(a1);
18*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.uminv.i16.v8i16
19*0a6a1f1dSLionel Sambuc }
20*0a6a1f1dSLionel Sambuc
21*0a6a1f1dSLionel Sambuc // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit pairwise
test_vmin_u8(uint8x8_t a1,uint8x8_t a2)22*0a6a1f1dSLionel Sambuc uint8x8_t test_vmin_u8(uint8x8_t a1, uint8x8_t a2) {
23*0a6a1f1dSLionel Sambuc // CHECK: test_vmin_u8
24*0a6a1f1dSLionel Sambuc return vmin_u8(a1, a2);
25*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.umin.v8i8
26*0a6a1f1dSLionel Sambuc }
27*0a6a1f1dSLionel Sambuc
test_vminq_u8(uint8x16_t a1,uint8x16_t a2)28*0a6a1f1dSLionel Sambuc uint8x16_t test_vminq_u8(uint8x16_t a1, uint8x16_t a2) {
29*0a6a1f1dSLionel Sambuc // CHECK: test_vminq_u8
30*0a6a1f1dSLionel Sambuc return vminq_u8(a1, a2);
31*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.umin.v16i8
32*0a6a1f1dSLionel Sambuc }
33*0a6a1f1dSLionel Sambuc
test_vmaxq_s16(int16x8_t a1,int16x8_t a2)34*0a6a1f1dSLionel Sambuc int16x8_t test_vmaxq_s16(int16x8_t a1, int16x8_t a2) {
35*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxq_s16
36*0a6a1f1dSLionel Sambuc return vmaxq_s16(a1, a2);
37*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.smax.v8i16
38*0a6a1f1dSLionel Sambuc }
39*0a6a1f1dSLionel Sambuc
40*0a6a1f1dSLionel Sambuc // Test the more complicated cases of [suf]32 and f64
test_vmaxq_f64(float64x2_t a1,float64x2_t a2)41*0a6a1f1dSLionel Sambuc float64x2_t test_vmaxq_f64(float64x2_t a1, float64x2_t a2) {
42*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxq_f64
43*0a6a1f1dSLionel Sambuc return vmaxq_f64(a1, a2);
44*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.fmax.v2f64
45*0a6a1f1dSLionel Sambuc }
46*0a6a1f1dSLionel Sambuc
test_vmaxq_f32(float32x4_t a1,float32x4_t a2)47*0a6a1f1dSLionel Sambuc float32x4_t test_vmaxq_f32(float32x4_t a1, float32x4_t a2) {
48*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxq_f32
49*0a6a1f1dSLionel Sambuc return vmaxq_f32(a1, a2);
50*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.fmax.v4f32
51*0a6a1f1dSLionel Sambuc }
52*0a6a1f1dSLionel Sambuc
test_vminq_f64(float64x2_t a1,float64x2_t a2)53*0a6a1f1dSLionel Sambuc float64x2_t test_vminq_f64(float64x2_t a1, float64x2_t a2) {
54*0a6a1f1dSLionel Sambuc // CHECK: test_vminq_f64
55*0a6a1f1dSLionel Sambuc return vminq_f64(a1, a2);
56*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.fmin.v2f64
57*0a6a1f1dSLionel Sambuc }
58*0a6a1f1dSLionel Sambuc
test_vmax_f32(float32x2_t a1,float32x2_t a2)59*0a6a1f1dSLionel Sambuc float32x2_t test_vmax_f32(float32x2_t a1, float32x2_t a2) {
60*0a6a1f1dSLionel Sambuc // CHECK: test_vmax_f32
61*0a6a1f1dSLionel Sambuc return vmax_f32(a1, a2);
62*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.fmax.v2f32
63*0a6a1f1dSLionel Sambuc }
64*0a6a1f1dSLionel Sambuc
test_vmax_s32(int32x2_t a1,int32x2_t a2)65*0a6a1f1dSLionel Sambuc int32x2_t test_vmax_s32(int32x2_t a1, int32x2_t a2) {
66*0a6a1f1dSLionel Sambuc // CHECK: test_vmax_s32
67*0a6a1f1dSLionel Sambuc return vmax_s32(a1, a2);
68*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.smax.v2i32
69*0a6a1f1dSLionel Sambuc }
70*0a6a1f1dSLionel Sambuc
test_vmin_u32(uint32x2_t a1,uint32x2_t a2)71*0a6a1f1dSLionel Sambuc uint32x2_t test_vmin_u32(uint32x2_t a1, uint32x2_t a2) {
72*0a6a1f1dSLionel Sambuc // CHECK: test_vmin_u32
73*0a6a1f1dSLionel Sambuc return vmin_u32(a1, a2);
74*0a6a1f1dSLionel Sambuc // CHECK llvm.aarch64.neon.umin.v2i32
75*0a6a1f1dSLionel Sambuc }
76*0a6a1f1dSLionel Sambuc
test_vmaxnmv_f32(float32x2_t a1)77*0a6a1f1dSLionel Sambuc float32_t test_vmaxnmv_f32(float32x2_t a1) {
78*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxnmv_f32
79*0a6a1f1dSLionel Sambuc return vmaxnmv_f32(a1);
80*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v2f32
81*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
82*0a6a1f1dSLionel Sambuc }
83*0a6a1f1dSLionel Sambuc
84*0a6a1f1dSLionel Sambuc // this doesn't translate into a valid instruction, regardless of what the
85*0a6a1f1dSLionel Sambuc // ARM doc says.
86*0a6a1f1dSLionel Sambuc #if 0
87*0a6a1f1dSLionel Sambuc float64_t test_vmaxnmvq_f64(float64x2_t a1) {
88*0a6a1f1dSLionel Sambuc // CHECK@ test_vmaxnmvq_f64
89*0a6a1f1dSLionel Sambuc return vmaxnmvq_f64(a1);
90*0a6a1f1dSLionel Sambuc // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
91*0a6a1f1dSLionel Sambuc // CHECK-NEXT@ ret
92*0a6a1f1dSLionel Sambuc }
93*0a6a1f1dSLionel Sambuc #endif
94*0a6a1f1dSLionel Sambuc
test_vmaxnmvq_f32(float32x4_t a1)95*0a6a1f1dSLionel Sambuc float32_t test_vmaxnmvq_f32(float32x4_t a1) {
96*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxnmvq_f32
97*0a6a1f1dSLionel Sambuc return vmaxnmvq_f32(a1);
98*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v4f32
99*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
100*0a6a1f1dSLionel Sambuc }
101*0a6a1f1dSLionel Sambuc
test_vmaxv_f32(float32x2_t a1)102*0a6a1f1dSLionel Sambuc float32_t test_vmaxv_f32(float32x2_t a1) {
103*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxv_f32
104*0a6a1f1dSLionel Sambuc return vmaxv_f32(a1);
105*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fmaxv.f32.v2f32
106*0a6a1f1dSLionel Sambuc // FIXME check that the 2nd and 3rd arguments are the same V register below
107*0a6a1f1dSLionel Sambuc // CHECK-CODEGEN: fmaxp.2s
108*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
109*0a6a1f1dSLionel Sambuc }
110*0a6a1f1dSLionel Sambuc
test_vmaxv_s32(int32x2_t a1)111*0a6a1f1dSLionel Sambuc int32_t test_vmaxv_s32(int32x2_t a1) {
112*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxv_s32
113*0a6a1f1dSLionel Sambuc return vmaxv_s32(a1);
114*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.smaxv.i32.v2i32
115*0a6a1f1dSLionel Sambuc // FIXME check that the 2nd and 3rd arguments are the same V register below
116*0a6a1f1dSLionel Sambuc // CHECK-CODEGEN: smaxp.2s
117*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
118*0a6a1f1dSLionel Sambuc }
119*0a6a1f1dSLionel Sambuc
test_vmaxv_u32(uint32x2_t a1)120*0a6a1f1dSLionel Sambuc uint32_t test_vmaxv_u32(uint32x2_t a1) {
121*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxv_u32
122*0a6a1f1dSLionel Sambuc return vmaxv_u32(a1);
123*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.umaxv.i32.v2i32
124*0a6a1f1dSLionel Sambuc // FIXME check that the 2nd and 3rd arguments are the same V register below
125*0a6a1f1dSLionel Sambuc // CHECK-CODEGEN: umaxp.2s
126*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
127*0a6a1f1dSLionel Sambuc }
128*0a6a1f1dSLionel Sambuc
129*0a6a1f1dSLionel Sambuc // FIXME punt on this for now; don't forget to fix CHECKs
130*0a6a1f1dSLionel Sambuc #if 0
131*0a6a1f1dSLionel Sambuc float64_t test_vmaxvq_f64(float64x2_t a1) {
132*0a6a1f1dSLionel Sambuc // CHECK@ test_vmaxvq_f64
133*0a6a1f1dSLionel Sambuc return vmaxvq_f64(a1);
134*0a6a1f1dSLionel Sambuc // CHECK@ llvm.aarch64.neon.fmaxv.i64.v2f64
135*0a6a1f1dSLionel Sambuc // CHECK-NEXT@ ret
136*0a6a1f1dSLionel Sambuc }
137*0a6a1f1dSLionel Sambuc #endif
138*0a6a1f1dSLionel Sambuc
test_vmaxvq_f32(float32x4_t a1)139*0a6a1f1dSLionel Sambuc float32_t test_vmaxvq_f32(float32x4_t a1) {
140*0a6a1f1dSLionel Sambuc // CHECK: test_vmaxvq_f32
141*0a6a1f1dSLionel Sambuc return vmaxvq_f32(a1);
142*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fmaxv.f32.v4f32
143*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
144*0a6a1f1dSLionel Sambuc }
145*0a6a1f1dSLionel Sambuc
test_vminnmv_f32(float32x2_t a1)146*0a6a1f1dSLionel Sambuc float32_t test_vminnmv_f32(float32x2_t a1) {
147*0a6a1f1dSLionel Sambuc // CHECK: test_vminnmv_f32
148*0a6a1f1dSLionel Sambuc return vminnmv_f32(a1);
149*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fminnmv.f32.v2f32
150*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
151*0a6a1f1dSLionel Sambuc }
152*0a6a1f1dSLionel Sambuc
test_vminvq_f32(float32x4_t a1)153*0a6a1f1dSLionel Sambuc float32_t test_vminvq_f32(float32x4_t a1) {
154*0a6a1f1dSLionel Sambuc // CHECK: test_vminvq_f32
155*0a6a1f1dSLionel Sambuc return vminvq_f32(a1);
156*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fminv.f32.v4f32
157*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
158*0a6a1f1dSLionel Sambuc }
159*0a6a1f1dSLionel Sambuc
160*0a6a1f1dSLionel Sambuc // this doesn't translate into a valid instruction, regardless of what the ARM
161*0a6a1f1dSLionel Sambuc // doc says.
162*0a6a1f1dSLionel Sambuc #if 0
163*0a6a1f1dSLionel Sambuc float64_t test_vminnmvq_f64(float64x2_t a1) {
164*0a6a1f1dSLionel Sambuc // CHECK@ test_vminnmvq_f64
165*0a6a1f1dSLionel Sambuc return vminnmvq_f64(a1);
166*0a6a1f1dSLionel Sambuc // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
167*0a6a1f1dSLionel Sambuc // CHECK-NEXT@ ret
168*0a6a1f1dSLionel Sambuc }
169*0a6a1f1dSLionel Sambuc #endif
170*0a6a1f1dSLionel Sambuc
test_vminnmvq_f32(float32x4_t a1)171*0a6a1f1dSLionel Sambuc float32_t test_vminnmvq_f32(float32x4_t a1) {
172*0a6a1f1dSLionel Sambuc // CHECK: test_vminnmvq_f32
173*0a6a1f1dSLionel Sambuc return vminnmvq_f32(a1);
174*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fminnmv.f32.v4f32
175*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
176*0a6a1f1dSLionel Sambuc }
177*0a6a1f1dSLionel Sambuc
test_vminv_f32(float32x2_t a1)178*0a6a1f1dSLionel Sambuc float32_t test_vminv_f32(float32x2_t a1) {
179*0a6a1f1dSLionel Sambuc // CHECK: test_vminv_f32
180*0a6a1f1dSLionel Sambuc return vminv_f32(a1);
181*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fminv.f32.v2f32
182*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
183*0a6a1f1dSLionel Sambuc }
184*0a6a1f1dSLionel Sambuc
test_vminv_s32(int32x2_t a1)185*0a6a1f1dSLionel Sambuc int32_t test_vminv_s32(int32x2_t a1) {
186*0a6a1f1dSLionel Sambuc // CHECK: test_vminv_s32
187*0a6a1f1dSLionel Sambuc return vminv_s32(a1);
188*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.sminv.i32.v2i32
189*0a6a1f1dSLionel Sambuc // CHECK-CODEGEN: sminp.2s
190*0a6a1f1dSLionel Sambuc // CHECK-NEXT: ret
191*0a6a1f1dSLionel Sambuc }
192*0a6a1f1dSLionel Sambuc
test_vminv_u32(uint32x2_t a1)193*0a6a1f1dSLionel Sambuc uint32_t test_vminv_u32(uint32x2_t a1) {
194*0a6a1f1dSLionel Sambuc // CHECK: test_vminv_u32
195*0a6a1f1dSLionel Sambuc return vminv_u32(a1);
196*0a6a1f1dSLionel Sambuc // CHECK: llvm.aarch64.neon.fminv.f32.v2f32
197*0a6a1f1dSLionel Sambuc }
198*0a6a1f1dSLionel Sambuc
199*0a6a1f1dSLionel Sambuc // FIXME punt on this for now; don't forget to fix CHECKs
200*0a6a1f1dSLionel Sambuc #if 0
201*0a6a1f1dSLionel Sambuc float64_t test_vminvq_f64(float64x2_t a1) {
202*0a6a1f1dSLionel Sambuc // CHECK@ test_vminvq_f64
203*0a6a1f1dSLionel Sambuc return vminvq_f64(a1);
204*0a6a1f1dSLionel Sambuc // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
205*0a6a1f1dSLionel Sambuc // CHECK-NEXT@ ret
206*0a6a1f1dSLionel Sambuc }
207*0a6a1f1dSLionel Sambuc #endif
208