xref: /llvm-project/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
3 // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-LE %s
4 // RUN: %clang_cc1 -triple aarch64_be -target-feature +neon -target-feature +bf16 \
5 // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-BE %s
6 
7 // REQUIRES: aarch64-registered-target || arm-registered-target
8 
9 #include <arm_neon.h>
10 
11 // CHECK-LE-LABEL: @test_vcopy_lane_bf16_v1(
12 // CHECK-LE-NEXT:  entry:
13 // CHECK-LE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 3
14 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 1
15 // CHECK-LE-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
16 //
17 // CHECK-BE-LABEL: @test_vcopy_lane_bf16_v1(
18 // CHECK-BE-NEXT:  entry:
19 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
20 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
21 // CHECK-BE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 3
22 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 1
23 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
24 // CHECK-BE-NEXT:    ret <4 x bfloat> [[SHUFFLE5]]
25 //
26 bfloat16x4_t test_vcopy_lane_bf16_v1(bfloat16x4_t a, bfloat16x4_t b) {
27   return vcopy_lane_bf16(a, 1, b, 3);
28 }
29 
30 // CHECK-LE-LABEL: @test_vcopy_lane_bf16_v2(
31 // CHECK-LE-NEXT:  entry:
32 // CHECK-LE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
33 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 2
34 // CHECK-LE-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
35 //
36 // CHECK-BE-LABEL: @test_vcopy_lane_bf16_v2(
37 // CHECK-BE-NEXT:  entry:
38 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
39 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
40 // CHECK-BE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 0
41 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 2
42 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
43 // CHECK-BE-NEXT:    ret <4 x bfloat> [[SHUFFLE5]]
44 //
45 bfloat16x4_t test_vcopy_lane_bf16_v2(bfloat16x4_t a, bfloat16x4_t b) {
46   return vcopy_lane_bf16(a, 2, b, 0);
47 }
48 
49 // CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v1(
50 // CHECK-LE-NEXT:  entry:
51 // CHECK-LE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 2
52 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 0
53 // CHECK-LE-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
54 //
55 // CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v1(
56 // CHECK-BE-NEXT:  entry:
57 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
58 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
59 // CHECK-BE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 2
60 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 0
61 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
62 // CHECK-BE-NEXT:    ret <8 x bfloat> [[SHUFFLE5]]
63 //
64 bfloat16x8_t test_vcopyq_lane_bf16_v1(bfloat16x8_t a, bfloat16x4_t b) {
65   return vcopyq_lane_bf16(a, 0, b, 2);
66 }
67 
68 // CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v2(
69 // CHECK-LE-NEXT:  entry:
70 // CHECK-LE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
71 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 6
72 // CHECK-LE-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
73 //
74 // CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v2(
75 // CHECK-BE-NEXT:  entry:
76 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
77 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
78 // CHECK-BE-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 0
79 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 6
80 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
81 // CHECK-BE-NEXT:    ret <8 x bfloat> [[SHUFFLE5]]
82 //
83 bfloat16x8_t test_vcopyq_lane_bf16_v2(bfloat16x8_t a, bfloat16x4_t b) {
84   return vcopyq_lane_bf16(a, 6, b, 0);
85 }
86 
87 // CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v1(
88 // CHECK-LE-NEXT:  entry:
89 // CHECK-LE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7
90 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 0
91 // CHECK-LE-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
92 //
93 // CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v1(
94 // CHECK-BE-NEXT:  entry:
95 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
96 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
97 // CHECK-BE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 7
98 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 0
99 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
100 // CHECK-BE-NEXT:    ret <4 x bfloat> [[SHUFFLE5]]
101 //
102 bfloat16x4_t test_vcopy_laneq_bf16_v1(bfloat16x4_t a, bfloat16x8_t b) {
103   return vcopy_laneq_bf16(a, 0, b, 7);
104 }
105 
106 // CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v2(
107 // CHECK-LE-NEXT:  entry:
108 // CHECK-LE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 4
109 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3
110 // CHECK-LE-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
111 //
112 // CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v2(
113 // CHECK-BE-NEXT:  entry:
114 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
115 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
116 // CHECK-BE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 4
117 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3
118 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
119 // CHECK-BE-NEXT:    ret <4 x bfloat> [[SHUFFLE5]]
120 //
121 bfloat16x4_t test_vcopy_laneq_bf16_v2(bfloat16x4_t a, bfloat16x8_t b) {
122   return vcopy_laneq_bf16(a, 3, b, 4);
123 }
124 
125 // CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v1(
126 // CHECK-LE-NEXT:  entry:
127 // CHECK-LE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7
128 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3
129 // CHECK-LE-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
130 //
131 // CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v1(
132 // CHECK-BE-NEXT:  entry:
133 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
134 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
135 // CHECK-BE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 7
136 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3
137 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
138 // CHECK-BE-NEXT:    ret <8 x bfloat> [[SHUFFLE5]]
139 //
140 bfloat16x8_t test_vcopyq_laneq_bf16_v1(bfloat16x8_t a, bfloat16x8_t b) {
141   return vcopyq_laneq_bf16(a, 3, b, 7);
142 
143 }
144 
145 // CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v2(
146 // CHECK-LE-NEXT:  entry:
147 // CHECK-LE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 2
148 // CHECK-LE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 6
149 // CHECK-LE-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
150 //
151 // CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v2(
152 // CHECK-BE-NEXT:  entry:
153 // CHECK-BE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
154 // CHECK-BE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
155 // CHECK-BE-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 2
156 // CHECK-BE-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 6
157 // CHECK-BE-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
158 // CHECK-BE-NEXT:    ret <8 x bfloat> [[SHUFFLE5]]
159 //
160 bfloat16x8_t test_vcopyq_laneq_bf16_v2(bfloat16x8_t a, bfloat16x8_t b) {
161   return vcopyq_laneq_bf16(a, 6, b, 2);
162 }
163 
164