xref: /llvm-project/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
3*207e5cccSFangrui Song // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
4*207e5cccSFangrui Song 
5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target || arm-registered-target
6*207e5cccSFangrui Song 
7*207e5cccSFangrui Song #include <arm_neon.h>
8*207e5cccSFangrui Song 
9*207e5cccSFangrui Song // CHECK-LABEL: @test_vcreate_bf16(
10*207e5cccSFangrui Song // CHECK-NEXT:  entry:
11*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A:%.*]] to <4 x bfloat>
12*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[TMP0]]
13*207e5cccSFangrui Song //
14*207e5cccSFangrui Song bfloat16x4_t test_vcreate_bf16(uint64_t a) {
15*207e5cccSFangrui Song   return vcreate_bf16(a);
16*207e5cccSFangrui Song }
17*207e5cccSFangrui Song 
18*207e5cccSFangrui Song // CHECK-LABEL: @test_vdup_n_bf16(
19*207e5cccSFangrui Song // CHECK-NEXT:  entry:
20*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i32 0
21*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
22*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
23*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
24*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[VECINIT3_I]]
25*207e5cccSFangrui Song //
26*207e5cccSFangrui Song bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) {
27*207e5cccSFangrui Song   return vdup_n_bf16(v);
28*207e5cccSFangrui Song }
29*207e5cccSFangrui Song 
30*207e5cccSFangrui Song // CHECK-LABEL: @test_vdupq_n_bf16(
31*207e5cccSFangrui Song // CHECK-NEXT:  entry:
32*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i32 0
33*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
34*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
35*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
36*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x bfloat> [[VECINIT3_I]], bfloat [[V]], i32 4
37*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x bfloat> [[VECINIT4_I]], bfloat [[V]], i32 5
38*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x bfloat> [[VECINIT5_I]], bfloat [[V]], i32 6
39*207e5cccSFangrui Song // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x bfloat> [[VECINIT6_I]], bfloat [[V]], i32 7
40*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x bfloat> [[VECINIT7_I]]
41*207e5cccSFangrui Song //
42*207e5cccSFangrui Song bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
43*207e5cccSFangrui Song   return vdupq_n_bf16(v);
44*207e5cccSFangrui Song }
45*207e5cccSFangrui Song 
46*207e5cccSFangrui Song // CHECK-LABEL: @test_vdup_lane_bf16(
47*207e5cccSFangrui Song // CHECK-NEXT:  entry:
48*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
49*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
50*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
51*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[LANE]]
52*207e5cccSFangrui Song //
53*207e5cccSFangrui Song bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
54*207e5cccSFangrui Song   return vdup_lane_bf16(v, 1);
55*207e5cccSFangrui Song }
56*207e5cccSFangrui Song 
57*207e5cccSFangrui Song // CHECK-LABEL: @test_vdupq_lane_bf16(
58*207e5cccSFangrui Song // CHECK-NEXT:  entry:
59*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
60*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
61*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
62*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x bfloat> [[LANE]]
63*207e5cccSFangrui Song //
64*207e5cccSFangrui Song bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
65*207e5cccSFangrui Song   return vdupq_lane_bf16(v, 1);
66*207e5cccSFangrui Song }
67*207e5cccSFangrui Song 
68*207e5cccSFangrui Song // CHECK-LABEL: @test_vdup_laneq_bf16(
69*207e5cccSFangrui Song // CHECK-NEXT:  entry:
70*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
71*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
72*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
73*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[LANE]]
74*207e5cccSFangrui Song //
75*207e5cccSFangrui Song bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
76*207e5cccSFangrui Song   return vdup_laneq_bf16(v, 7);
77*207e5cccSFangrui Song }
78*207e5cccSFangrui Song 
79*207e5cccSFangrui Song // CHECK-LABEL: @test_vdupq_laneq_bf16(
80*207e5cccSFangrui Song // CHECK-NEXT:  entry:
81*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
82*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
83*207e5cccSFangrui Song // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
84*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x bfloat> [[LANE]]
85*207e5cccSFangrui Song //
86*207e5cccSFangrui Song bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) {
87*207e5cccSFangrui Song   return vdupq_laneq_bf16(v, 7);
88*207e5cccSFangrui Song }
89*207e5cccSFangrui Song 
90*207e5cccSFangrui Song // CHECK-LABEL: @test_vcombine_bf16(
91*207e5cccSFangrui Song // CHECK-NEXT:  entry:
92*207e5cccSFangrui Song // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[LOW:%.*]], <4 x bfloat> [[HIGH:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
93*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x bfloat> [[SHUFFLE_I]]
94*207e5cccSFangrui Song //
95*207e5cccSFangrui Song bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) {
96*207e5cccSFangrui Song   return vcombine_bf16(low, high);
97*207e5cccSFangrui Song }
98*207e5cccSFangrui Song 
99*207e5cccSFangrui Song // CHECK-LABEL: @test_vget_high_bf16(
100*207e5cccSFangrui Song // CHECK-NEXT:  entry:
101*207e5cccSFangrui Song // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
102*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[SHUFFLE_I]]
103*207e5cccSFangrui Song //
104*207e5cccSFangrui Song bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) {
105*207e5cccSFangrui Song   return vget_high_bf16(a);
106*207e5cccSFangrui Song }
107*207e5cccSFangrui Song 
108*207e5cccSFangrui Song // CHECK-LABEL: @test_vget_low_bf16(
109*207e5cccSFangrui Song // CHECK-NEXT:  entry:
110*207e5cccSFangrui Song // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[SHUFFLE_I]]
112*207e5cccSFangrui Song //
113*207e5cccSFangrui Song bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) {
114*207e5cccSFangrui Song   return vget_low_bf16(a);
115*207e5cccSFangrui Song }
116*207e5cccSFangrui Song 
117*207e5cccSFangrui Song // CHECK-LABEL: @test_vget_lane_bf16(
118*207e5cccSFangrui Song // CHECK-NEXT:  entry:
119*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
120*207e5cccSFangrui Song // CHECK-NEXT:    ret bfloat [[VGET_LANE]]
121*207e5cccSFangrui Song //
122*207e5cccSFangrui Song bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) {
123*207e5cccSFangrui Song   return vget_lane_bf16(v, 1);
124*207e5cccSFangrui Song }
125*207e5cccSFangrui Song 
126*207e5cccSFangrui Song // CHECK-LABEL: @test_vgetq_lane_bf16(
127*207e5cccSFangrui Song // CHECK-NEXT:  entry:
128*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
129*207e5cccSFangrui Song // CHECK-NEXT:    ret bfloat [[VGETQ_LANE]]
130*207e5cccSFangrui Song //
131*207e5cccSFangrui Song bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) {
132*207e5cccSFangrui Song   return vgetq_lane_bf16(v, 7);
133*207e5cccSFangrui Song }
134*207e5cccSFangrui Song 
135*207e5cccSFangrui Song // CHECK-LABEL: @test_vset_lane_bf16(
136*207e5cccSFangrui Song // CHECK-NEXT:  entry:
137*207e5cccSFangrui Song // CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 1
138*207e5cccSFangrui Song // CHECK-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
139*207e5cccSFangrui Song //
140*207e5cccSFangrui Song bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) {
141*207e5cccSFangrui Song   return vset_lane_bf16(a, v, 1);
142*207e5cccSFangrui Song }
143*207e5cccSFangrui Song 
144*207e5cccSFangrui Song // CHECK-LABEL: @test_vsetq_lane_bf16(
145*207e5cccSFangrui Song // CHECK-NEXT:  entry:
146*207e5cccSFangrui Song // CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 7
147*207e5cccSFangrui Song // CHECK-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
148*207e5cccSFangrui Song //
149*207e5cccSFangrui Song bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
150*207e5cccSFangrui Song   return vsetq_lane_bf16(a, v, 7);
151*207e5cccSFangrui Song }
152*207e5cccSFangrui Song 
153*207e5cccSFangrui Song // CHECK-LABEL: @test_vduph_lane_bf16(
154*207e5cccSFangrui Song // CHECK-NEXT:  entry:
155*207e5cccSFangrui Song // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
156*207e5cccSFangrui Song // CHECK-NEXT:    ret bfloat [[VGET_LANE]]
157*207e5cccSFangrui Song //
158*207e5cccSFangrui Song bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
159*207e5cccSFangrui Song   return vduph_lane_bf16(v, 1);
160*207e5cccSFangrui Song }
161*207e5cccSFangrui Song 
162*207e5cccSFangrui Song // CHECK-LABEL: @test_vduph_laneq_bf16(
163*207e5cccSFangrui Song // CHECK-NEXT:  entry:
164*207e5cccSFangrui Song // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
165*207e5cccSFangrui Song // CHECK-NEXT:    ret bfloat [[VGETQ_LANE]]
166*207e5cccSFangrui Song //
167*207e5cccSFangrui Song bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) {
168*207e5cccSFangrui Song   return vduph_laneq_bf16(v, 7);
169*207e5cccSFangrui Song }
170