xref: /llvm-project/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll (revision 547bfda56b2e3f3a4c6d2357d3566dcd3fa996ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=-bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
3; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-BF16
4
5define <4 x bfloat> @add_h(<4 x bfloat> %a, <4 x bfloat> %b) {
6; CHECK-CVT-LABEL: add_h:
7; CHECK-CVT:       // %bb.0: // %entry
8; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
9; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
10; CHECK-CVT-NEXT:    fadd v0.4s, v0.4s, v1.4s
11; CHECK-CVT-NEXT:    movi v1.4s, #1
12; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
13; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
14; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
15; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
16; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
17; CHECK-CVT-NEXT:    ret
18;
19; CHECK-BF16-LABEL: add_h:
20; CHECK-BF16:       // %bb.0: // %entry
21; CHECK-BF16-NEXT:    shll v1.4s, v1.4h, #16
22; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
23; CHECK-BF16-NEXT:    fadd v0.4s, v0.4s, v1.4s
24; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
25; CHECK-BF16-NEXT:    ret
26entry:
27
28  %0 = fadd <4 x bfloat> %a, %b
29  ret <4 x bfloat> %0
30}
31
32
33define <4 x bfloat> @build_h4(<4 x bfloat> %a) {
34; CHECK-LABEL: build_h4:
35; CHECK:       // %bb.0: // %entry
36; CHECK-NEXT:    mov w8, #15565 // =0x3ccd
37; CHECK-NEXT:    dup v0.4h, w8
38; CHECK-NEXT:    ret
39entry:
40  ret <4 x bfloat> <bfloat 0xR3CCD, bfloat 0xR3CCD, bfloat 0xR3CCD, bfloat 0xR3CCD>
41}
42
43
44define <4 x bfloat> @sub_h(<4 x bfloat> %a, <4 x bfloat> %b) {
45; CHECK-CVT-LABEL: sub_h:
46; CHECK-CVT:       // %bb.0: // %entry
47; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
48; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
49; CHECK-CVT-NEXT:    fsub v0.4s, v0.4s, v1.4s
50; CHECK-CVT-NEXT:    movi v1.4s, #1
51; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
52; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
53; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
54; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
55; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
56; CHECK-CVT-NEXT:    ret
57;
58; CHECK-BF16-LABEL: sub_h:
59; CHECK-BF16:       // %bb.0: // %entry
60; CHECK-BF16-NEXT:    shll v1.4s, v1.4h, #16
61; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
62; CHECK-BF16-NEXT:    fsub v0.4s, v0.4s, v1.4s
63; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
64; CHECK-BF16-NEXT:    ret
65entry:
66
67  %0 = fsub <4 x bfloat> %a, %b
68  ret <4 x bfloat> %0
69}
70
71
72define <4 x bfloat> @mul_h(<4 x bfloat> %a, <4 x bfloat> %b) {
73; CHECK-CVT-LABEL: mul_h:
74; CHECK-CVT:       // %bb.0: // %entry
75; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
76; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
77; CHECK-CVT-NEXT:    fmul v0.4s, v0.4s, v1.4s
78; CHECK-CVT-NEXT:    movi v1.4s, #1
79; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
80; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
81; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
82; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
83; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
84; CHECK-CVT-NEXT:    ret
85;
86; CHECK-BF16-LABEL: mul_h:
87; CHECK-BF16:       // %bb.0: // %entry
88; CHECK-BF16-NEXT:    shll v1.4s, v1.4h, #16
89; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
90; CHECK-BF16-NEXT:    fmul v0.4s, v0.4s, v1.4s
91; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
92; CHECK-BF16-NEXT:    ret
93entry:
94
95  %0 = fmul <4 x bfloat> %a, %b
96  ret <4 x bfloat> %0
97}
98
99
100define <4 x bfloat> @div_h(<4 x bfloat> %a, <4 x bfloat> %b) {
101; CHECK-CVT-LABEL: div_h:
102; CHECK-CVT:       // %bb.0: // %entry
103; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
104; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
105; CHECK-CVT-NEXT:    fdiv v0.4s, v0.4s, v1.4s
106; CHECK-CVT-NEXT:    movi v1.4s, #1
107; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
108; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
109; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
110; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
111; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
112; CHECK-CVT-NEXT:    ret
113;
114; CHECK-BF16-LABEL: div_h:
115; CHECK-BF16:       // %bb.0: // %entry
116; CHECK-BF16-NEXT:    shll v1.4s, v1.4h, #16
117; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
118; CHECK-BF16-NEXT:    fdiv v0.4s, v0.4s, v1.4s
119; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
120; CHECK-BF16-NEXT:    ret
121entry:
122
123  %0 = fdiv <4 x bfloat> %a, %b
124  ret <4 x bfloat> %0
125}
126
127
128define <4 x bfloat> @load_h(ptr %a) {
129; CHECK-LABEL: load_h:
130; CHECK:       // %bb.0: // %entry
131; CHECK-NEXT:    ldr d0, [x0]
132; CHECK-NEXT:    ret
133entry:
134  %0 = load <4 x bfloat>, ptr %a, align 4
135  ret <4 x bfloat> %0
136}
137
138
139define void @store_h(ptr %a, <4 x bfloat> %b) {
140; CHECK-LABEL: store_h:
141; CHECK:       // %bb.0: // %entry
142; CHECK-NEXT:    str d0, [x0]
143; CHECK-NEXT:    ret
144entry:
145  store <4 x bfloat> %b, ptr %a, align 4
146  ret void
147}
148
149define <4 x bfloat> @s_to_h(<4 x float> %a) {
150; CHECK-CVT-LABEL: s_to_h:
151; CHECK-CVT:       // %bb.0:
152; CHECK-CVT-NEXT:    movi v1.4s, #1
153; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
154; CHECK-CVT-NEXT:    ushr v3.4s, v0.4s, #16
155; CHECK-CVT-NEXT:    and v1.16b, v3.16b, v1.16b
156; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
157; CHECK-CVT-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
158; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
159; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v2.4s
160; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v3.16b
161; CHECK-CVT-NEXT:    shrn v0.4h, v0.4s, #16
162; CHECK-CVT-NEXT:    ret
163;
164; CHECK-BF16-LABEL: s_to_h:
165; CHECK-BF16:       // %bb.0:
166; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
167; CHECK-BF16-NEXT:    ret
168  %1 = fptrunc <4 x float> %a to <4 x bfloat>
169  ret <4 x bfloat> %1
170}
171
172define <4 x bfloat> @d_to_h(<4 x double> %a) {
173; CHECK-CVT-LABEL: d_to_h:
174; CHECK-CVT:       // %bb.0:
175; CHECK-CVT-NEXT:    fcvtxn v0.2s, v0.2d
176; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
177; CHECK-CVT-NEXT:    fcvtxn2 v0.4s, v1.2d
178; CHECK-CVT-NEXT:    movi v1.4s, #1
179; CHECK-CVT-NEXT:    ushr v3.4s, v0.4s, #16
180; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
181; CHECK-CVT-NEXT:    and v1.16b, v3.16b, v1.16b
182; CHECK-CVT-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
183; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
184; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v2.4s
185; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v3.16b
186; CHECK-CVT-NEXT:    shrn v0.4h, v0.4s, #16
187; CHECK-CVT-NEXT:    ret
188;
189; CHECK-BF16-LABEL: d_to_h:
190; CHECK-BF16:       // %bb.0:
191; CHECK-BF16-NEXT:    fcvtxn v0.2s, v0.2d
192; CHECK-BF16-NEXT:    fcvtxn2 v0.4s, v1.2d
193; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
194; CHECK-BF16-NEXT:    ret
195  %1 = fptrunc <4 x double> %a to <4 x bfloat>
196  ret <4 x bfloat> %1
197}
198
199define <4 x float> @h_to_s(<4 x bfloat> %a) {
200; CHECK-LABEL: h_to_s:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    shll v0.4s, v0.4h, #16
203; CHECK-NEXT:    ret
204  %1 = fpext <4 x bfloat> %a to <4 x float>
205  ret <4 x float> %1
206}
207
208define <4 x double> @h_to_d(<4 x bfloat> %a) {
209; CHECK-LABEL: h_to_d:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    shll v0.4s, v0.4h, #16
212; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
213; CHECK-NEXT:    fcvtl v0.2d, v0.2s
214; CHECK-NEXT:    ret
215  %1 = fpext <4 x bfloat> %a to <4 x double>
216  ret <4 x double> %1
217}
218
219define <4 x bfloat> @bitcast_i_to_h(float, <4 x i16> %a) {
220; CHECK-LABEL: bitcast_i_to_h:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    fmov d0, d1
223; CHECK-NEXT:    ret
224  %2 = bitcast <4 x i16> %a to <4 x bfloat>
225  ret <4 x bfloat> %2
226}
227
228define <4 x i16> @bitcast_h_to_i(float, <4 x bfloat> %a) {
229; CHECK-LABEL: bitcast_h_to_i:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    fmov d0, d1
232; CHECK-NEXT:    ret
233  %2 = bitcast <4 x bfloat> %a to <4 x i16>
234  ret <4 x i16> %2
235}
236
237define <4 x bfloat> @sitofp_i8(<4 x i8> %a) #0 {
238; CHECK-CVT-LABEL: sitofp_i8:
239; CHECK-CVT:       // %bb.0:
240; CHECK-CVT-NEXT:    shl v0.4h, v0.4h, #8
241; CHECK-CVT-NEXT:    movi v1.4s, #1
242; CHECK-CVT-NEXT:    sshr v0.4h, v0.4h, #8
243; CHECK-CVT-NEXT:    sshll v0.4s, v0.4h, #0
244; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
245; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
246; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
247; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
248; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
249; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
250; CHECK-CVT-NEXT:    ret
251;
252; CHECK-BF16-LABEL: sitofp_i8:
253; CHECK-BF16:       // %bb.0:
254; CHECK-BF16-NEXT:    shl v0.4h, v0.4h, #8
255; CHECK-BF16-NEXT:    sshr v0.4h, v0.4h, #8
256; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
257; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
258; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
259; CHECK-BF16-NEXT:    ret
260  %1 = sitofp <4 x i8> %a to <4 x bfloat>
261  ret <4 x bfloat> %1
262}
263
264define <4 x bfloat> @sitofp_i16(<4 x i16> %a) #0 {
265; CHECK-CVT-LABEL: sitofp_i16:
266; CHECK-CVT:       // %bb.0:
267; CHECK-CVT-NEXT:    sshll v0.4s, v0.4h, #0
268; CHECK-CVT-NEXT:    movi v1.4s, #1
269; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
270; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
271; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
272; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
273; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
274; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
275; CHECK-CVT-NEXT:    ret
276;
277; CHECK-BF16-LABEL: sitofp_i16:
278; CHECK-BF16:       // %bb.0:
279; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
280; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
281; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
282; CHECK-BF16-NEXT:    ret
283  %1 = sitofp <4 x i16> %a to <4 x bfloat>
284  ret <4 x bfloat> %1
285}
286
287
288define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
289; CHECK-CVT-LABEL: sitofp_i32:
290; CHECK-CVT:       // %bb.0:
291; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
292; CHECK-CVT-NEXT:    movi v1.4s, #1
293; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
294; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
295; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
296; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
297; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
298; CHECK-CVT-NEXT:    ret
299;
300; CHECK-BF16-LABEL: sitofp_i32:
301; CHECK-BF16:       // %bb.0:
302; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
303; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
304; CHECK-BF16-NEXT:    ret
305  %1 = sitofp <4 x i32> %a to <4 x bfloat>
306  ret <4 x bfloat> %1
307}
308
309
310define <4 x bfloat> @sitofp_i64(<4 x i64> %a) #0 {
311; CHECK-CVT-LABEL: sitofp_i64:
312; CHECK-CVT:       // %bb.0:
313; CHECK-CVT-NEXT:    scvtf v0.2d, v0.2d
314; CHECK-CVT-NEXT:    scvtf v1.2d, v1.2d
315; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
316; CHECK-CVT-NEXT:    fcvtn v0.2s, v0.2d
317; CHECK-CVT-NEXT:    fcvtn2 v0.4s, v1.2d
318; CHECK-CVT-NEXT:    movi v1.4s, #1
319; CHECK-CVT-NEXT:    ushr v3.4s, v0.4s, #16
320; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
321; CHECK-CVT-NEXT:    and v1.16b, v3.16b, v1.16b
322; CHECK-CVT-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
323; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
324; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v2.4s
325; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v3.16b
326; CHECK-CVT-NEXT:    shrn v0.4h, v0.4s, #16
327; CHECK-CVT-NEXT:    ret
328;
329; CHECK-BF16-LABEL: sitofp_i64:
330; CHECK-BF16:       // %bb.0:
331; CHECK-BF16-NEXT:    scvtf v0.2d, v0.2d
332; CHECK-BF16-NEXT:    scvtf v1.2d, v1.2d
333; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
334; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
335; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
336; CHECK-BF16-NEXT:    ret
337  %1 = sitofp <4 x i64> %a to <4 x bfloat>
338  ret <4 x bfloat> %1
339}
340
341define <4 x bfloat> @uitofp_i8(<4 x i8> %a) #0 {
342; CHECK-CVT-LABEL: uitofp_i8:
343; CHECK-CVT:       // %bb.0:
344; CHECK-CVT-NEXT:    bic v0.4h, #255, lsl #8
345; CHECK-CVT-NEXT:    movi v1.4s, #1
346; CHECK-CVT-NEXT:    ushll v0.4s, v0.4h, #0
347; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
348; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
349; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
350; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
351; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
352; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
353; CHECK-CVT-NEXT:    ret
354;
355; CHECK-BF16-LABEL: uitofp_i8:
356; CHECK-BF16:       // %bb.0:
357; CHECK-BF16-NEXT:    bic v0.4h, #255, lsl #8
358; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
359; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
360; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
361; CHECK-BF16-NEXT:    ret
362  %1 = uitofp <4 x i8> %a to <4 x bfloat>
363  ret <4 x bfloat> %1
364}
365
366
367define <4 x bfloat> @uitofp_i16(<4 x i16> %a) #0 {
368; CHECK-CVT-LABEL: uitofp_i16:
369; CHECK-CVT:       // %bb.0:
370; CHECK-CVT-NEXT:    ushll v0.4s, v0.4h, #0
371; CHECK-CVT-NEXT:    movi v1.4s, #1
372; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
373; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
374; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
375; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
376; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
377; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
378; CHECK-CVT-NEXT:    ret
379;
380; CHECK-BF16-LABEL: uitofp_i16:
381; CHECK-BF16:       // %bb.0:
382; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
383; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
384; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
385; CHECK-BF16-NEXT:    ret
386  %1 = uitofp <4 x i16> %a to <4 x bfloat>
387  ret <4 x bfloat> %1
388}
389
390
391define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
392; CHECK-CVT-LABEL: uitofp_i32:
393; CHECK-CVT:       // %bb.0:
394; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
395; CHECK-CVT-NEXT:    movi v1.4s, #1
396; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
397; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
398; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
399; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
400; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
401; CHECK-CVT-NEXT:    ret
402;
403; CHECK-BF16-LABEL: uitofp_i32:
404; CHECK-BF16:       // %bb.0:
405; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
406; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
407; CHECK-BF16-NEXT:    ret
408  %1 = uitofp <4 x i32> %a to <4 x bfloat>
409  ret <4 x bfloat> %1
410}
411
412
413define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
414; CHECK-CVT-LABEL: uitofp_i64:
415; CHECK-CVT:       // %bb.0:
416; CHECK-CVT-NEXT:    ucvtf v0.2d, v0.2d
417; CHECK-CVT-NEXT:    ucvtf v1.2d, v1.2d
418; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
419; CHECK-CVT-NEXT:    fcvtn v0.2s, v0.2d
420; CHECK-CVT-NEXT:    fcvtn2 v0.4s, v1.2d
421; CHECK-CVT-NEXT:    movi v1.4s, #1
422; CHECK-CVT-NEXT:    ushr v3.4s, v0.4s, #16
423; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
424; CHECK-CVT-NEXT:    and v1.16b, v3.16b, v1.16b
425; CHECK-CVT-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
426; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
427; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v2.4s
428; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v3.16b
429; CHECK-CVT-NEXT:    shrn v0.4h, v0.4s, #16
430; CHECK-CVT-NEXT:    ret
431;
432; CHECK-BF16-LABEL: uitofp_i64:
433; CHECK-BF16:       // %bb.0:
434; CHECK-BF16-NEXT:    ucvtf v0.2d, v0.2d
435; CHECK-BF16-NEXT:    ucvtf v1.2d, v1.2d
436; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
437; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
438; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
439; CHECK-BF16-NEXT:    ret
440  %1 = uitofp <4 x i64> %a to <4 x bfloat>
441  ret <4 x bfloat> %1
442}
443
444define void @test_insert_at_zero(bfloat %a, ptr %b) #0 {
445; CHECK-LABEL: test_insert_at_zero:
446; CHECK:       // %bb.0:
447; CHECK-NEXT:    // kill: def $h0 killed $h0 def $d0
448; CHECK-NEXT:    str d0, [x0]
449; CHECK-NEXT:    ret
450  %1 = insertelement <4 x bfloat> undef, bfloat %a, i64 0
451  store <4 x bfloat> %1, ptr %b, align 4
452  ret void
453}
454
455define <4 x i8> @fptosi_i8(<4 x bfloat> %a) #0 {
456; CHECK-LABEL: fptosi_i8:
457; CHECK:       // %bb.0:
458; CHECK-NEXT:    shll v0.4s, v0.4h, #16
459; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
460; CHECK-NEXT:    xtn v0.4h, v0.4s
461; CHECK-NEXT:    ret
462  %1 = fptosi<4 x bfloat> %a to <4 x i8>
463  ret <4 x i8> %1
464}
465
466define <4 x i16> @fptosi_i16(<4 x bfloat> %a) #0 {
467; CHECK-LABEL: fptosi_i16:
468; CHECK:       // %bb.0:
469; CHECK-NEXT:    shll v0.4s, v0.4h, #16
470; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
471; CHECK-NEXT:    xtn v0.4h, v0.4s
472; CHECK-NEXT:    ret
473  %1 = fptosi<4 x bfloat> %a to <4 x i16>
474  ret <4 x i16> %1
475}
476
477define <4 x i8> @fptoui_i8(<4 x bfloat> %a) #0 {
478; CHECK-LABEL: fptoui_i8:
479; CHECK:       // %bb.0:
480; CHECK-NEXT:    shll v0.4s, v0.4h, #16
481; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
482; CHECK-NEXT:    xtn v0.4h, v0.4s
483; CHECK-NEXT:    ret
484; NOTE: fcvtzs selected here because the xtn shaves the sign bit
485  %1 = fptoui<4 x bfloat> %a to <4 x i8>
486  ret <4 x i8> %1
487}
488
489define <4 x i16> @fptoui_i16(<4 x bfloat> %a) #0 {
490; CHECK-LABEL: fptoui_i16:
491; CHECK:       // %bb.0:
492; CHECK-NEXT:    shll v0.4s, v0.4h, #16
493; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
494; CHECK-NEXT:    xtn v0.4h, v0.4s
495; CHECK-NEXT:    ret
496  %1 = fptoui<4 x bfloat> %a to <4 x i16>
497  ret <4 x i16> %1
498}
499
500define <4 x i1> @test_fcmp_une(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
501; CHECK-LABEL: test_fcmp_une:
502; CHECK:       // %bb.0:
503; CHECK-NEXT:    shll v1.4s, v1.4h, #16
504; CHECK-NEXT:    shll v0.4s, v0.4h, #16
505; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
506; CHECK-NEXT:    mvn v0.16b, v0.16b
507; CHECK-NEXT:    xtn v0.4h, v0.4s
508; CHECK-NEXT:    ret
509
510  %1 = fcmp une <4 x bfloat> %a, %b
511  ret <4 x i1> %1
512}
513
514define <4 x i1> @test_fcmp_ueq(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
515; CHECK-LABEL: test_fcmp_ueq:
516; CHECK:       // %bb.0:
517; CHECK-NEXT:    shll v1.4s, v1.4h, #16
518; CHECK-NEXT:    shll v0.4s, v0.4h, #16
519; CHECK-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
520; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
521; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
522; CHECK-NEXT:    xtn v0.4h, v0.4s
523; CHECK-NEXT:    mvn v0.8b, v0.8b
524; CHECK-NEXT:    ret
525
526  %1 = fcmp ueq <4 x bfloat> %a, %b
527  ret <4 x i1> %1
528}
529
530define <4 x i1> @test_fcmp_ugt(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
531; CHECK-LABEL: test_fcmp_ugt:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    shll v0.4s, v0.4h, #16
534; CHECK-NEXT:    shll v1.4s, v1.4h, #16
535; CHECK-NEXT:    fcmge v0.4s, v1.4s, v0.4s
536; CHECK-NEXT:    xtn v0.4h, v0.4s
537; CHECK-NEXT:    mvn v0.8b, v0.8b
538; CHECK-NEXT:    ret
539
540  %1 = fcmp ugt <4 x bfloat> %a, %b
541  ret <4 x i1> %1
542}
543
544define <4 x i1> @test_fcmp_uge(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
545; CHECK-LABEL: test_fcmp_uge:
546; CHECK:       // %bb.0:
547; CHECK-NEXT:    shll v0.4s, v0.4h, #16
548; CHECK-NEXT:    shll v1.4s, v1.4h, #16
549; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
550; CHECK-NEXT:    xtn v0.4h, v0.4s
551; CHECK-NEXT:    mvn v0.8b, v0.8b
552; CHECK-NEXT:    ret
553
554  %1 = fcmp uge <4 x bfloat> %a, %b
555  ret <4 x i1> %1
556}
557
558define <4 x i1> @test_fcmp_ult(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
559; CHECK-LABEL: test_fcmp_ult:
560; CHECK:       // %bb.0:
561; CHECK-NEXT:    shll v1.4s, v1.4h, #16
562; CHECK-NEXT:    shll v0.4s, v0.4h, #16
563; CHECK-NEXT:    fcmge v0.4s, v0.4s, v1.4s
564; CHECK-NEXT:    xtn v0.4h, v0.4s
565; CHECK-NEXT:    mvn v0.8b, v0.8b
566; CHECK-NEXT:    ret
567
568  %1 = fcmp ult <4 x bfloat> %a, %b
569  ret <4 x i1> %1
570}
571
572define <4 x i1> @test_fcmp_ule(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
573; CHECK-LABEL: test_fcmp_ule:
574; CHECK:       // %bb.0:
575; CHECK-NEXT:    shll v1.4s, v1.4h, #16
576; CHECK-NEXT:    shll v0.4s, v0.4h, #16
577; CHECK-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
578; CHECK-NEXT:    xtn v0.4h, v0.4s
579; CHECK-NEXT:    mvn v0.8b, v0.8b
580; CHECK-NEXT:    ret
581
582  %1 = fcmp ule <4 x bfloat> %a, %b
583  ret <4 x i1> %1
584}
585
586define <4 x i1> @test_fcmp_uno(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
587; CHECK-LABEL: test_fcmp_uno:
588; CHECK:       // %bb.0:
589; CHECK-NEXT:    shll v1.4s, v1.4h, #16
590; CHECK-NEXT:    shll v0.4s, v0.4h, #16
591; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
592; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
593; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
594; CHECK-NEXT:    xtn v0.4h, v0.4s
595; CHECK-NEXT:    mvn v0.8b, v0.8b
596; CHECK-NEXT:    ret
597
598  %1 = fcmp uno <4 x bfloat> %a, %b
599  ret <4 x i1> %1
600}
601
602define <4 x i1> @test_fcmp_one(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
603; CHECK-LABEL: test_fcmp_one:
604; CHECK:       // %bb.0:
605; CHECK-NEXT:    shll v1.4s, v1.4h, #16
606; CHECK-NEXT:    shll v0.4s, v0.4h, #16
607; CHECK-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
608; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
609; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
610; CHECK-NEXT:    xtn v0.4h, v0.4s
611; CHECK-NEXT:    ret
612
613  %1 = fcmp one <4 x bfloat> %a, %b
614  ret <4 x i1> %1
615}
616
617define <4 x i1> @test_fcmp_oeq(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
618; CHECK-LABEL: test_fcmp_oeq:
619; CHECK:       // %bb.0:
620; CHECK-NEXT:    shll v1.4s, v1.4h, #16
621; CHECK-NEXT:    shll v0.4s, v0.4h, #16
622; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
623; CHECK-NEXT:    xtn v0.4h, v0.4s
624; CHECK-NEXT:    ret
625
626  %1 = fcmp oeq <4 x bfloat> %a, %b
627  ret <4 x i1> %1
628}
629
630define <4 x i1> @test_fcmp_ogt(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
631; CHECK-LABEL: test_fcmp_ogt:
632; CHECK:       // %bb.0:
633; CHECK-NEXT:    shll v1.4s, v1.4h, #16
634; CHECK-NEXT:    shll v0.4s, v0.4h, #16
635; CHECK-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
636; CHECK-NEXT:    xtn v0.4h, v0.4s
637; CHECK-NEXT:    ret
638
639  %1 = fcmp ogt <4 x bfloat> %a, %b
640  ret <4 x i1> %1
641}
642
643define <4 x i1> @test_fcmp_oge(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
644; CHECK-LABEL: test_fcmp_oge:
645; CHECK:       // %bb.0:
646; CHECK-NEXT:    shll v1.4s, v1.4h, #16
647; CHECK-NEXT:    shll v0.4s, v0.4h, #16
648; CHECK-NEXT:    fcmge v0.4s, v0.4s, v1.4s
649; CHECK-NEXT:    xtn v0.4h, v0.4s
650; CHECK-NEXT:    ret
651
652  %1 = fcmp oge <4 x bfloat> %a, %b
653  ret <4 x i1> %1
654}
655
656define <4 x i1> @test_fcmp_olt(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
657; CHECK-LABEL: test_fcmp_olt:
658; CHECK:       // %bb.0:
659; CHECK-NEXT:    shll v0.4s, v0.4h, #16
660; CHECK-NEXT:    shll v1.4s, v1.4h, #16
661; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
662; CHECK-NEXT:    xtn v0.4h, v0.4s
663; CHECK-NEXT:    ret
664
665  %1 = fcmp olt <4 x bfloat> %a, %b
666  ret <4 x i1> %1
667}
668
669define <4 x i1> @test_fcmp_ole(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
670; CHECK-LABEL: test_fcmp_ole:
671; CHECK:       // %bb.0:
672; CHECK-NEXT:    shll v0.4s, v0.4h, #16
673; CHECK-NEXT:    shll v1.4s, v1.4h, #16
674; CHECK-NEXT:    fcmge v0.4s, v1.4s, v0.4s
675; CHECK-NEXT:    xtn v0.4h, v0.4s
676; CHECK-NEXT:    ret
677
678  %1 = fcmp ole <4 x bfloat> %a, %b
679  ret <4 x i1> %1
680}
681
682define <4 x i1> @test_fcmp_ord(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
683; CHECK-LABEL: test_fcmp_ord:
684; CHECK:       // %bb.0:
685; CHECK-NEXT:    shll v1.4s, v1.4h, #16
686; CHECK-NEXT:    shll v0.4s, v0.4h, #16
687; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
688; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
689; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
690; CHECK-NEXT:    xtn v0.4h, v0.4s
691; CHECK-NEXT:    ret
692
693  %1 = fcmp ord <4 x bfloat> %a, %b
694  ret <4 x i1> %1
695}
696
697attributes #0 = { nounwind }
698