1 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -verify -emit-llvm -o - %s 2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +outline-atomics -verify -emit-llvm -o - %s 3 // REQUIRES: aarch64-registered-target 4 5 // Test that functions with the correct target attributes can use the correct NEON intrinsics. 6 7 #include <arm_neon.h> 8 9 __attribute__((target("dotprod"))) 10 void dotprod(uint32x2_t v2i32, uint8x16_t v16i8, uint8x8_t v8i8) { 11 vdot_u32(v2i32, v8i8, v8i8); 12 vdot_laneq_u32(v2i32, v8i8, v16i8, 1); 13 } 14 15 __attribute__((target("fullfp16"))) 16 void fp16(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16) { 17 vceqz_f16(v4f16); 18 vrnd_f16(v4f16); 19 vmaxnm_f16(v4f16, v4f16); 20 vrndi_f16(v4f16); 21 } 22 23 __attribute__((target("fp16fml"))) 24 void fp16fml(float32x2_t v2f32, float16x4_t v4f16) { 25 vfmlal_low_f16(v2f32, v4f16, v4f16); 26 } 27 28 __attribute__((target("i8mm"))) 29 void i8mm(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16) { 30 vmmlaq_s32(v4i32, v8i16, v8i16); 31 vusdot_laneq_s32(v2i32, v8i8, v8i16, 0); 32 } 33 34 __attribute__((target("bf16"))) 35 void bf16(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16, __bf16 bf16) { 36 vbfdot_f32(v2f32, v4bf16, v4bf16); 37 vcreate_bf16(10); 38 vdup_lane_bf16(v4bf16, 2); 39 vdup_n_bf16(bf16); 40 vld1_bf16(0); 41 vcvt_f32_bf16(v4bf16); 42 vcvt_bf16_f32(v4f32); 43 } 44 45 __attribute__((target("arch=armv8-a"))) 46 uint64x2_t test_v8(uint64x2_t a, uint64x2_t b) { 47 return veorq_u64(a, b); 48 } 49 50 __attribute__((target("arch=armv8.1-a"))) 51 void test_v81(int32x2_t d, int32x4_t v, int s) { 52 vqrdmlahq_s32(v, v, v); 53 vqrdmlah_laneq_s32(d, d, v, 1); 54 vqrdmlahh_s16(1, 1, 1); 55 } 56 57 __attribute__((target("arch=armv8.3-a+fp16"))) 58 void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) { 59 vcaddq_rot90_f32(v4f32, v4f32); 60 vcmla_rot90_f16(v4f16, v4f16, v4f16); 61 vcmlaq_rot270_f64(v2f64, v2f64, v2f64); 62 } 63 64 __attribute__((target("arch=armv8.5-a"))) 65 void test_v85(float32x4_t v4f32) { 66 vrnd32xq_f32(v4f32); 67 } 68 69 void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64, bfloat16x4_t v4bf16, __bf16 bf16, poly64_t poly64, poly64x2_t poly64x2) { 70 // dotprod 71 vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} 72 vdot_laneq_u32(v2i32, v8i8, v16i8, 1); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} 73 // fp16 74 vceqz_f16(v4f16); // expected-error {{always_inline function 'vceqz_f16' requires target feature 'fullfp16'}} 75 vrnd_f16(v4f16); // expected-error {{always_inline function 'vrnd_f16' requires target feature 'fullfp16'}} 76 vmaxnm_f16(v4f16, v4f16); // expected-error {{always_inline function 'vmaxnm_f16' requires target feature 'fullfp16'}} 77 vrndi_f16(v4f16); // expected-error {{always_inline function 'vrndi_f16' requires target feature 'fullfp16'}} 78 // fp16fml depends on fp-armv8 79 vfmlal_low_f16(v2f32, v4f16, v4f16); // expected-error {{always_inline function 'vfmlal_low_f16' requires target feature 'fp-armv8'}} 80 // i8mm 81 vmmlaq_s32(v4i32, v8i16, v8i16); // expected-error {{always_inline function 'vmmlaq_s32' requires target feature 'i8mm'}} 82 vusdot_laneq_s32(v2i32, v8i8, v8i16, 0); // expected-error {{always_inline function 'vusdot_s32' requires target feature 'i8mm'}} 83 // bf16 84 vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline function 'vbfdot_f32' requires target feature 'bf16'}} 85 vcreate_bf16(10); 86 vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_bf16' needs target feature bf16}} 87 vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' requires target feature 'bf16'}} 88 vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16}} 89 vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'bf16'}} 90 vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}} 91 // v8.1 - qrdmla 92 vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}} 93 vqrdmlah_laneq_s32(v2i32, v2i32, v4i32, 1); // expected-error {{always_inline function 'vqrdmlah_s32' requires target feature 'v8.1a'}} 94 vqrdmlahh_s16(1, 1, 1); // expected-error {{always_inline function 'vqrdmlahh_s16' requires target feature 'v8.1a'}} 95 // 8.3 - complex 96 vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}} 97 vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}} 98 vcmlaq_rot270_f64(v2f64, v2f64, v2f64); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}} 99 // 8.5 - frint 100 vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}} 101 102 vmull_p64(poly64, poly64); // expected-error {{always_inline function 'vmull_p64' requires target feature 'aes'}} 103 vmull_high_p64(poly64x2, poly64x2); // expected-error {{always_inline function 'vmull_high_p64' requires target feature 'aes'}} 104 105 } 106