xref: /llvm-project/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll (revision 547bfda56b2e3f3a4c6d2357d3566dcd3fa996ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=-bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
3; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-BF16
4
5define <8 x bfloat> @add_h(<8 x bfloat> %a, <8 x bfloat> %b) {
6; CHECK-CVT-LABEL: add_h:
7; CHECK-CVT:       // %bb.0: // %entry
8; CHECK-CVT-NEXT:    shll2 v3.4s, v1.8h, #16
9; CHECK-CVT-NEXT:    shll2 v4.4s, v0.8h, #16
10; CHECK-CVT-NEXT:    movi v2.4s, #1
11; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
12; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
13; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
14; CHECK-CVT-NEXT:    fadd v3.4s, v4.4s, v3.4s
15; CHECK-CVT-NEXT:    fadd v0.4s, v0.4s, v1.4s
16; CHECK-CVT-NEXT:    and v2.16b, v5.16b, v2.16b
17; CHECK-CVT-NEXT:    movi v1.4s, #127, msl #8
18; CHECK-CVT-NEXT:    fcmeq v5.4s, v3.4s, v3.4s
19; CHECK-CVT-NEXT:    add v4.4s, v3.4s, v2.4s
20; CHECK-CVT-NEXT:    orr v3.4s, #64, lsl #16
21; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
22; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
23; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
24; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v1.4s
25; CHECK-CVT-NEXT:    add v1.4s, v2.4s, v1.4s
26; CHECK-CVT-NEXT:    mov v2.16b, v5.16b
27; CHECK-CVT-NEXT:    bsl v2.16b, v4.16b, v3.16b
28; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v6.16b
29; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
30; CHECK-CVT-NEXT:    ret
31;
32; CHECK-BF16-LABEL: add_h:
33; CHECK-BF16:       // %bb.0: // %entry
34; CHECK-BF16-NEXT:    shll v2.4s, v1.4h, #16
35; CHECK-BF16-NEXT:    shll v3.4s, v0.4h, #16
36; CHECK-BF16-NEXT:    shll2 v1.4s, v1.8h, #16
37; CHECK-BF16-NEXT:    shll2 v0.4s, v0.8h, #16
38; CHECK-BF16-NEXT:    fadd v2.4s, v3.4s, v2.4s
39; CHECK-BF16-NEXT:    fadd v1.4s, v0.4s, v1.4s
40; CHECK-BF16-NEXT:    bfcvtn v0.4h, v2.4s
41; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
42; CHECK-BF16-NEXT:    ret
43entry:
44  %0 = fadd <8 x bfloat> %a, %b
45  ret <8 x bfloat> %0
46}
47
48
49define <8 x bfloat> @sub_h(<8 x bfloat> %a, <8 x bfloat> %b) {
50; CHECK-CVT-LABEL: sub_h:
51; CHECK-CVT:       // %bb.0: // %entry
52; CHECK-CVT-NEXT:    shll2 v3.4s, v1.8h, #16
53; CHECK-CVT-NEXT:    shll2 v4.4s, v0.8h, #16
54; CHECK-CVT-NEXT:    movi v2.4s, #1
55; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
56; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
57; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
58; CHECK-CVT-NEXT:    fsub v3.4s, v4.4s, v3.4s
59; CHECK-CVT-NEXT:    fsub v0.4s, v0.4s, v1.4s
60; CHECK-CVT-NEXT:    and v2.16b, v5.16b, v2.16b
61; CHECK-CVT-NEXT:    movi v1.4s, #127, msl #8
62; CHECK-CVT-NEXT:    fcmeq v5.4s, v3.4s, v3.4s
63; CHECK-CVT-NEXT:    add v4.4s, v3.4s, v2.4s
64; CHECK-CVT-NEXT:    orr v3.4s, #64, lsl #16
65; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
66; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
67; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
68; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v1.4s
69; CHECK-CVT-NEXT:    add v1.4s, v2.4s, v1.4s
70; CHECK-CVT-NEXT:    mov v2.16b, v5.16b
71; CHECK-CVT-NEXT:    bsl v2.16b, v4.16b, v3.16b
72; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v6.16b
73; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
74; CHECK-CVT-NEXT:    ret
75;
76; CHECK-BF16-LABEL: sub_h:
77; CHECK-BF16:       // %bb.0: // %entry
78; CHECK-BF16-NEXT:    shll v2.4s, v1.4h, #16
79; CHECK-BF16-NEXT:    shll v3.4s, v0.4h, #16
80; CHECK-BF16-NEXT:    shll2 v1.4s, v1.8h, #16
81; CHECK-BF16-NEXT:    shll2 v0.4s, v0.8h, #16
82; CHECK-BF16-NEXT:    fsub v2.4s, v3.4s, v2.4s
83; CHECK-BF16-NEXT:    fsub v1.4s, v0.4s, v1.4s
84; CHECK-BF16-NEXT:    bfcvtn v0.4h, v2.4s
85; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
86; CHECK-BF16-NEXT:    ret
87entry:
88  %0 = fsub <8 x bfloat> %a, %b
89  ret <8 x bfloat> %0
90}
91
92
93define <8 x bfloat> @mul_h(<8 x bfloat> %a, <8 x bfloat> %b) {
94; CHECK-CVT-LABEL: mul_h:
95; CHECK-CVT:       // %bb.0: // %entry
96; CHECK-CVT-NEXT:    shll2 v3.4s, v1.8h, #16
97; CHECK-CVT-NEXT:    shll2 v4.4s, v0.8h, #16
98; CHECK-CVT-NEXT:    movi v2.4s, #1
99; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
100; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
101; CHECK-CVT-NEXT:    shll v0.4s, v0.4h, #16
102; CHECK-CVT-NEXT:    fmul v3.4s, v4.4s, v3.4s
103; CHECK-CVT-NEXT:    fmul v0.4s, v0.4s, v1.4s
104; CHECK-CVT-NEXT:    and v2.16b, v5.16b, v2.16b
105; CHECK-CVT-NEXT:    movi v1.4s, #127, msl #8
106; CHECK-CVT-NEXT:    fcmeq v5.4s, v3.4s, v3.4s
107; CHECK-CVT-NEXT:    add v4.4s, v3.4s, v2.4s
108; CHECK-CVT-NEXT:    orr v3.4s, #64, lsl #16
109; CHECK-CVT-NEXT:    add v2.4s, v0.4s, v2.4s
110; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
111; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
112; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v1.4s
113; CHECK-CVT-NEXT:    add v1.4s, v2.4s, v1.4s
114; CHECK-CVT-NEXT:    mov v2.16b, v5.16b
115; CHECK-CVT-NEXT:    bsl v2.16b, v4.16b, v3.16b
116; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v6.16b
117; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
118; CHECK-CVT-NEXT:    ret
119;
120; CHECK-BF16-LABEL: mul_h:
121; CHECK-BF16:       // %bb.0: // %entry
122; CHECK-BF16-NEXT:    shll v2.4s, v1.4h, #16
123; CHECK-BF16-NEXT:    shll v3.4s, v0.4h, #16
124; CHECK-BF16-NEXT:    shll2 v1.4s, v1.8h, #16
125; CHECK-BF16-NEXT:    shll2 v0.4s, v0.8h, #16
126; CHECK-BF16-NEXT:    fmul v2.4s, v3.4s, v2.4s
127; CHECK-BF16-NEXT:    fmul v1.4s, v0.4s, v1.4s
128; CHECK-BF16-NEXT:    bfcvtn v0.4h, v2.4s
129; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
130; CHECK-BF16-NEXT:    ret
131entry:
132  %0 = fmul <8 x bfloat> %a, %b
133  ret <8 x bfloat> %0
134}
135
136
137define <8 x bfloat> @div_h(<8 x bfloat> %a, <8 x bfloat> %b) {
138; CHECK-CVT-LABEL: div_h:
139; CHECK-CVT:       // %bb.0: // %entry
140; CHECK-CVT-NEXT:    shll2 v2.4s, v1.8h, #16
141; CHECK-CVT-NEXT:    shll2 v3.4s, v0.8h, #16
142; CHECK-CVT-NEXT:    shll v1.4s, v1.4h, #16
143; CHECK-CVT-NEXT:    movi v4.4s, #127, msl #8
144; CHECK-CVT-NEXT:    fdiv v2.4s, v3.4s, v2.4s
145; CHECK-CVT-NEXT:    shll v3.4s, v0.4h, #16
146; CHECK-CVT-NEXT:    ushr v0.4s, v0.4s, #16
147; CHECK-CVT-NEXT:    fdiv v1.4s, v3.4s, v1.4s
148; CHECK-CVT-NEXT:    movi v3.4s, #1
149; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v3.16b
150; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v4.4s
151; CHECK-CVT-NEXT:    fcmeq v4.4s, v2.4s, v2.4s
152; CHECK-CVT-NEXT:    add v3.4s, v2.4s, v0.4s
153; CHECK-CVT-NEXT:    orr v2.4s, #64, lsl #16
154; CHECK-CVT-NEXT:    fcmeq v5.4s, v1.4s, v1.4s
155; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
156; CHECK-CVT-NEXT:    orr v1.4s, #64, lsl #16
157; CHECK-CVT-NEXT:    bit v2.16b, v3.16b, v4.16b
158; CHECK-CVT-NEXT:    bif v0.16b, v1.16b, v5.16b
159; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
160; CHECK-CVT-NEXT:    ret
161;
162; CHECK-BF16-LABEL: div_h:
163; CHECK-BF16:       // %bb.0: // %entry
164; CHECK-BF16-NEXT:    shll v2.4s, v1.4h, #16
165; CHECK-BF16-NEXT:    shll v3.4s, v0.4h, #16
166; CHECK-BF16-NEXT:    shll2 v1.4s, v1.8h, #16
167; CHECK-BF16-NEXT:    shll2 v0.4s, v0.8h, #16
168; CHECK-BF16-NEXT:    fdiv v2.4s, v3.4s, v2.4s
169; CHECK-BF16-NEXT:    fdiv v1.4s, v0.4s, v1.4s
170; CHECK-BF16-NEXT:    bfcvtn v0.4h, v2.4s
171; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
172; CHECK-BF16-NEXT:    ret
173entry:
174  %0 = fdiv <8 x bfloat> %a, %b
175  ret <8 x bfloat> %0
176}
177
178
179define <8 x bfloat> @load_h(ptr %a) {
180; CHECK-LABEL: load_h:
181; CHECK:       // %bb.0: // %entry
182; CHECK-NEXT:    ldr q0, [x0]
183; CHECK-NEXT:    ret
184entry:
185  %0 = load <8 x bfloat>, ptr %a, align 4
186  ret <8 x bfloat> %0
187}
188
189
190define void @store_h(ptr %a, <8 x bfloat> %b) {
191; CHECK-LABEL: store_h:
192; CHECK:       // %bb.0: // %entry
193; CHECK-NEXT:    str q0, [x0]
194; CHECK-NEXT:    ret
195entry:
196  store <8 x bfloat> %b, ptr %a, align 4
197  ret void
198}
199
200define <8 x bfloat> @s_to_h(<8 x float> %a) {
201; CHECK-CVT-LABEL: s_to_h:
202; CHECK-CVT:       // %bb.0:
203; CHECK-CVT-NEXT:    movi v2.4s, #1
204; CHECK-CVT-NEXT:    movi v3.4s, #127, msl #8
205; CHECK-CVT-NEXT:    ushr v4.4s, v1.4s, #16
206; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
207; CHECK-CVT-NEXT:    and v4.16b, v4.16b, v2.16b
208; CHECK-CVT-NEXT:    add v6.4s, v1.4s, v3.4s
209; CHECK-CVT-NEXT:    and v2.16b, v5.16b, v2.16b
210; CHECK-CVT-NEXT:    add v3.4s, v0.4s, v3.4s
211; CHECK-CVT-NEXT:    fcmeq v5.4s, v1.4s, v1.4s
212; CHECK-CVT-NEXT:    orr v1.4s, #64, lsl #16
213; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v6.4s
214; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
215; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
216; CHECK-CVT-NEXT:    add v2.4s, v2.4s, v3.4s
217; CHECK-CVT-NEXT:    bit v1.16b, v4.16b, v5.16b
218; CHECK-CVT-NEXT:    bit v0.16b, v2.16b, v6.16b
219; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
220; CHECK-CVT-NEXT:    ret
221;
222; CHECK-BF16-LABEL: s_to_h:
223; CHECK-BF16:       // %bb.0:
224; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
225; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
226; CHECK-BF16-NEXT:    ret
227  %1 = fptrunc <8 x float> %a to <8 x bfloat>
228  ret <8 x bfloat> %1
229}
230
231define <8 x bfloat> @d_to_h(<8 x double> %a) {
232; CHECK-CVT-LABEL: d_to_h:
233; CHECK-CVT:       // %bb.0:
234; CHECK-CVT-NEXT:    fcvtxn v2.2s, v2.2d
235; CHECK-CVT-NEXT:    fcvtxn v0.2s, v0.2d
236; CHECK-CVT-NEXT:    fcvtxn2 v2.4s, v3.2d
237; CHECK-CVT-NEXT:    fcvtxn2 v0.4s, v1.2d
238; CHECK-CVT-NEXT:    movi v1.4s, #1
239; CHECK-CVT-NEXT:    movi v3.4s, #127, msl #8
240; CHECK-CVT-NEXT:    ushr v4.4s, v2.4s, #16
241; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
242; CHECK-CVT-NEXT:    add v6.4s, v2.4s, v3.4s
243; CHECK-CVT-NEXT:    add v3.4s, v0.4s, v3.4s
244; CHECK-CVT-NEXT:    and v4.16b, v4.16b, v1.16b
245; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
246; CHECK-CVT-NEXT:    fcmeq v5.4s, v2.4s, v2.4s
247; CHECK-CVT-NEXT:    orr v2.4s, #64, lsl #16
248; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v6.4s
249; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
250; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v3.4s
251; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
252; CHECK-CVT-NEXT:    bit v2.16b, v4.16b, v5.16b
253; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v6.16b
254; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
255; CHECK-CVT-NEXT:    ret
256;
257; CHECK-BF16-LABEL: d_to_h:
258; CHECK-BF16:       // %bb.0:
259; CHECK-BF16-NEXT:    fcvtxn v0.2s, v0.2d
260; CHECK-BF16-NEXT:    fcvtxn v2.2s, v2.2d
261; CHECK-BF16-NEXT:    fcvtxn2 v0.4s, v1.2d
262; CHECK-BF16-NEXT:    fcvtxn2 v2.4s, v3.2d
263; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
264; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v2.4s
265; CHECK-BF16-NEXT:    ret
266  %1 = fptrunc <8 x double> %a to <8 x bfloat>
267  ret <8 x bfloat> %1
268}
269
270define <8 x float> @h_to_s(<8 x bfloat> %a) {
271; CHECK-LABEL: h_to_s:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    shll2 v1.4s, v0.8h, #16
274; CHECK-NEXT:    shll v0.4s, v0.4h, #16
275; CHECK-NEXT:    ret
276  %1 = fpext <8 x bfloat> %a to <8 x float>
277  ret <8 x float> %1
278}
279
280define <8 x double> @h_to_d(<8 x bfloat> %a) {
281; CHECK-LABEL: h_to_d:
282; CHECK:       // %bb.0:
283; CHECK-NEXT:    shll v1.4s, v0.4h, #16
284; CHECK-NEXT:    shll2 v2.4s, v0.8h, #16
285; CHECK-NEXT:    fcvtl v0.2d, v1.2s
286; CHECK-NEXT:    fcvtl2 v3.2d, v2.4s
287; CHECK-NEXT:    fcvtl2 v1.2d, v1.4s
288; CHECK-NEXT:    fcvtl v2.2d, v2.2s
289; CHECK-NEXT:    ret
290  %1 = fpext <8 x bfloat> %a to <8 x double>
291  ret <8 x double> %1
292}
293
294
295define <8 x bfloat> @bitcast_i_to_h(float, <8 x i16> %a) {
296; CHECK-LABEL: bitcast_i_to_h:
297; CHECK:       // %bb.0:
298; CHECK-NEXT:    mov v0.16b, v1.16b
299; CHECK-NEXT:    ret
300  %2 = bitcast <8 x i16> %a to <8 x bfloat>
301  ret <8 x bfloat> %2
302}
303
304define <8 x i16> @bitcast_h_to_i(float, <8 x bfloat> %a) {
305; CHECK-LABEL: bitcast_h_to_i:
306; CHECK:       // %bb.0:
307; CHECK-NEXT:    mov v0.16b, v1.16b
308; CHECK-NEXT:    ret
309  %2 = bitcast <8 x bfloat> %a to <8 x i16>
310  ret <8 x i16> %2
311}
312
313define <4 x bfloat> @sitofp_v4i8(<4 x i8> %a) #0 {
314; CHECK-CVT-LABEL: sitofp_v4i8:
315; CHECK-CVT:       // %bb.0:
316; CHECK-CVT-NEXT:    shl v0.4h, v0.4h, #8
317; CHECK-CVT-NEXT:    movi v1.4s, #1
318; CHECK-CVT-NEXT:    sshr v0.4h, v0.4h, #8
319; CHECK-CVT-NEXT:    sshll v0.4s, v0.4h, #0
320; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
321; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
322; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
323; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
324; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
325; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
326; CHECK-CVT-NEXT:    ret
327;
328; CHECK-BF16-LABEL: sitofp_v4i8:
329; CHECK-BF16:       // %bb.0:
330; CHECK-BF16-NEXT:    shl v0.4h, v0.4h, #8
331; CHECK-BF16-NEXT:    sshr v0.4h, v0.4h, #8
332; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
333; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
334; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
335; CHECK-BF16-NEXT:    ret
336  %1 = sitofp <4 x i8> %a to <4 x bfloat>
337  ret <4 x bfloat> %1
338}
339
340define <8 x bfloat> @sitofp_v8i8(<8 x i8> %a) #0 {
341; CHECK-CVT-LABEL: sitofp_v8i8:
342; CHECK-CVT:       // %bb.0:
343; CHECK-CVT-NEXT:    sshll v0.8h, v0.8b, #0
344; CHECK-CVT-NEXT:    movi v1.4s, #1
345; CHECK-CVT-NEXT:    movi v4.4s, #127, msl #8
346; CHECK-CVT-NEXT:    sshll v2.4s, v0.4h, #0
347; CHECK-CVT-NEXT:    sshll2 v0.4s, v0.8h, #0
348; CHECK-CVT-NEXT:    scvtf v2.4s, v2.4s
349; CHECK-CVT-NEXT:    scvtf v3.4s, v0.4s
350; CHECK-CVT-NEXT:    ushr v0.4s, v2.4s, #16
351; CHECK-CVT-NEXT:    ushr v5.4s, v3.4s, #16
352; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v1.16b
353; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
354; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v4.4s
355; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v4.4s
356; CHECK-CVT-NEXT:    addhn v0.4h, v2.4s, v0.4s
357; CHECK-CVT-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
358; CHECK-CVT-NEXT:    ret
359;
360; CHECK-BF16-LABEL: sitofp_v8i8:
361; CHECK-BF16:       // %bb.0:
362; CHECK-BF16-NEXT:    sshll v0.8h, v0.8b, #0
363; CHECK-BF16-NEXT:    sshll v1.4s, v0.4h, #0
364; CHECK-BF16-NEXT:    sshll2 v2.4s, v0.8h, #0
365; CHECK-BF16-NEXT:    scvtf v1.4s, v1.4s
366; CHECK-BF16-NEXT:    bfcvtn v0.4h, v1.4s
367; CHECK-BF16-NEXT:    scvtf v1.4s, v2.4s
368; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
369; CHECK-BF16-NEXT:    ret
370  %1 = sitofp <8 x i8> %a to <8 x bfloat>
371  ret <8 x bfloat> %1
372}
373
374define <16 x bfloat> @sitofp_v16i8(<16 x i8> %a) #0 {
375; CHECK-CVT-LABEL: sitofp_v16i8:
376; CHECK-CVT:       // %bb.0:
377; CHECK-CVT-NEXT:    sshll2 v2.8h, v0.16b, #0
378; CHECK-CVT-NEXT:    sshll v0.8h, v0.8b, #0
379; CHECK-CVT-NEXT:    movi v1.4s, #1
380; CHECK-CVT-NEXT:    movi v7.4s, #127, msl #8
381; CHECK-CVT-NEXT:    sshll v3.4s, v2.4h, #0
382; CHECK-CVT-NEXT:    sshll v4.4s, v0.4h, #0
383; CHECK-CVT-NEXT:    sshll2 v2.4s, v2.8h, #0
384; CHECK-CVT-NEXT:    sshll2 v0.4s, v0.8h, #0
385; CHECK-CVT-NEXT:    scvtf v3.4s, v3.4s
386; CHECK-CVT-NEXT:    scvtf v4.4s, v4.4s
387; CHECK-CVT-NEXT:    scvtf v2.4s, v2.4s
388; CHECK-CVT-NEXT:    scvtf v6.4s, v0.4s
389; CHECK-CVT-NEXT:    ushr v5.4s, v3.4s, #16
390; CHECK-CVT-NEXT:    ushr v0.4s, v4.4s, #16
391; CHECK-CVT-NEXT:    ushr v16.4s, v2.4s, #16
392; CHECK-CVT-NEXT:    ushr v17.4s, v6.4s, #16
393; CHECK-CVT-NEXT:    and v5.16b, v5.16b, v1.16b
394; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v1.16b
395; CHECK-CVT-NEXT:    and v16.16b, v16.16b, v1.16b
396; CHECK-CVT-NEXT:    and v17.16b, v17.16b, v1.16b
397; CHECK-CVT-NEXT:    add v5.4s, v5.4s, v7.4s
398; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v7.4s
399; CHECK-CVT-NEXT:    addhn v1.4h, v3.4s, v5.4s
400; CHECK-CVT-NEXT:    addhn v0.4h, v4.4s, v0.4s
401; CHECK-CVT-NEXT:    add v3.4s, v16.4s, v7.4s
402; CHECK-CVT-NEXT:    add v4.4s, v17.4s, v7.4s
403; CHECK-CVT-NEXT:    addhn2 v1.8h, v2.4s, v3.4s
404; CHECK-CVT-NEXT:    addhn2 v0.8h, v6.4s, v4.4s
405; CHECK-CVT-NEXT:    ret
406;
407; CHECK-BF16-LABEL: sitofp_v16i8:
408; CHECK-BF16:       // %bb.0:
409; CHECK-BF16-NEXT:    sshll2 v1.8h, v0.16b, #0
410; CHECK-BF16-NEXT:    sshll v0.8h, v0.8b, #0
411; CHECK-BF16-NEXT:    sshll v2.4s, v1.4h, #0
412; CHECK-BF16-NEXT:    sshll v3.4s, v0.4h, #0
413; CHECK-BF16-NEXT:    sshll2 v4.4s, v1.8h, #0
414; CHECK-BF16-NEXT:    sshll2 v5.4s, v0.8h, #0
415; CHECK-BF16-NEXT:    scvtf v2.4s, v2.4s
416; CHECK-BF16-NEXT:    scvtf v3.4s, v3.4s
417; CHECK-BF16-NEXT:    bfcvtn v1.4h, v2.4s
418; CHECK-BF16-NEXT:    scvtf v2.4s, v4.4s
419; CHECK-BF16-NEXT:    bfcvtn v0.4h, v3.4s
420; CHECK-BF16-NEXT:    scvtf v3.4s, v5.4s
421; CHECK-BF16-NEXT:    bfcvtn2 v1.8h, v2.4s
422; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v3.4s
423; CHECK-BF16-NEXT:    ret
424  %1 = sitofp <16 x i8> %a to <16 x bfloat>
425  ret <16 x bfloat> %1
426}
427
428define <8 x bfloat> @sitofp_i16(<8 x i16> %a) #0 {
429; CHECK-CVT-LABEL: sitofp_i16:
430; CHECK-CVT:       // %bb.0:
431; CHECK-CVT-NEXT:    sshll v2.4s, v0.4h, #0
432; CHECK-CVT-NEXT:    sshll2 v0.4s, v0.8h, #0
433; CHECK-CVT-NEXT:    movi v1.4s, #1
434; CHECK-CVT-NEXT:    movi v4.4s, #127, msl #8
435; CHECK-CVT-NEXT:    scvtf v2.4s, v2.4s
436; CHECK-CVT-NEXT:    scvtf v3.4s, v0.4s
437; CHECK-CVT-NEXT:    ushr v0.4s, v2.4s, #16
438; CHECK-CVT-NEXT:    ushr v5.4s, v3.4s, #16
439; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v1.16b
440; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
441; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v4.4s
442; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v4.4s
443; CHECK-CVT-NEXT:    addhn v0.4h, v2.4s, v0.4s
444; CHECK-CVT-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
445; CHECK-CVT-NEXT:    ret
446;
447; CHECK-BF16-LABEL: sitofp_i16:
448; CHECK-BF16:       // %bb.0:
449; CHECK-BF16-NEXT:    sshll v1.4s, v0.4h, #0
450; CHECK-BF16-NEXT:    sshll2 v2.4s, v0.8h, #0
451; CHECK-BF16-NEXT:    scvtf v1.4s, v1.4s
452; CHECK-BF16-NEXT:    bfcvtn v0.4h, v1.4s
453; CHECK-BF16-NEXT:    scvtf v1.4s, v2.4s
454; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
455; CHECK-BF16-NEXT:    ret
456  %1 = sitofp <8 x i16> %a to <8 x bfloat>
457  ret <8 x bfloat> %1
458}
459
460define <8 x bfloat> @sitofp_i32(<8 x i32> %a) #0 {
461; CHECK-CVT-LABEL: sitofp_i32:
462; CHECK-CVT:       // %bb.0:
463; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
464; CHECK-CVT-NEXT:    movi v2.4s, #1
465; CHECK-CVT-NEXT:    scvtf v1.4s, v1.4s
466; CHECK-CVT-NEXT:    movi v5.4s, #127, msl #8
467; CHECK-CVT-NEXT:    ushr v3.4s, v0.4s, #16
468; CHECK-CVT-NEXT:    ushr v4.4s, v1.4s, #16
469; CHECK-CVT-NEXT:    and v3.16b, v3.16b, v2.16b
470; CHECK-CVT-NEXT:    and v2.16b, v4.16b, v2.16b
471; CHECK-CVT-NEXT:    add v0.4s, v3.4s, v0.4s
472; CHECK-CVT-NEXT:    add v1.4s, v2.4s, v1.4s
473; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v5.4s
474; CHECK-CVT-NEXT:    addhn2 v0.8h, v1.4s, v5.4s
475; CHECK-CVT-NEXT:    ret
476;
477; CHECK-BF16-LABEL: sitofp_i32:
478; CHECK-BF16:       // %bb.0:
479; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
480; CHECK-BF16-NEXT:    scvtf v1.4s, v1.4s
481; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
482; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
483; CHECK-BF16-NEXT:    ret
484  %1 = sitofp <8 x i32> %a to <8 x bfloat>
485  ret <8 x bfloat> %1
486}
487
488
489define <8 x bfloat> @sitofp_i64(<8 x i64> %a) #0 {
490; CHECK-CVT-LABEL: sitofp_i64:
491; CHECK-CVT:       // %bb.0:
492; CHECK-CVT-NEXT:    scvtf v2.2d, v2.2d
493; CHECK-CVT-NEXT:    scvtf v0.2d, v0.2d
494; CHECK-CVT-NEXT:    scvtf v3.2d, v3.2d
495; CHECK-CVT-NEXT:    scvtf v1.2d, v1.2d
496; CHECK-CVT-NEXT:    fcvtn v2.2s, v2.2d
497; CHECK-CVT-NEXT:    fcvtn v0.2s, v0.2d
498; CHECK-CVT-NEXT:    fcvtn2 v2.4s, v3.2d
499; CHECK-CVT-NEXT:    fcvtn2 v0.4s, v1.2d
500; CHECK-CVT-NEXT:    movi v1.4s, #1
501; CHECK-CVT-NEXT:    movi v3.4s, #127, msl #8
502; CHECK-CVT-NEXT:    ushr v4.4s, v2.4s, #16
503; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
504; CHECK-CVT-NEXT:    add v6.4s, v2.4s, v3.4s
505; CHECK-CVT-NEXT:    add v3.4s, v0.4s, v3.4s
506; CHECK-CVT-NEXT:    and v4.16b, v4.16b, v1.16b
507; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
508; CHECK-CVT-NEXT:    fcmeq v5.4s, v2.4s, v2.4s
509; CHECK-CVT-NEXT:    orr v2.4s, #64, lsl #16
510; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v6.4s
511; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
512; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v3.4s
513; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
514; CHECK-CVT-NEXT:    bit v2.16b, v4.16b, v5.16b
515; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v6.16b
516; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
517; CHECK-CVT-NEXT:    ret
518;
519; CHECK-BF16-LABEL: sitofp_i64:
520; CHECK-BF16:       // %bb.0:
521; CHECK-BF16-NEXT:    scvtf v0.2d, v0.2d
522; CHECK-BF16-NEXT:    scvtf v2.2d, v2.2d
523; CHECK-BF16-NEXT:    scvtf v1.2d, v1.2d
524; CHECK-BF16-NEXT:    scvtf v3.2d, v3.2d
525; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
526; CHECK-BF16-NEXT:    fcvtn v2.2s, v2.2d
527; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
528; CHECK-BF16-NEXT:    fcvtn2 v2.4s, v3.2d
529; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
530; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v2.4s
531; CHECK-BF16-NEXT:    ret
532  %1 = sitofp <8 x i64> %a to <8 x bfloat>
533  ret <8 x bfloat> %1
534}
535
536define <4 x bfloat> @uitofp_v4i8(<4 x i8> %a) #0 {
537; CHECK-CVT-LABEL: uitofp_v4i8:
538; CHECK-CVT:       // %bb.0:
539; CHECK-CVT-NEXT:    bic v0.4h, #255, lsl #8
540; CHECK-CVT-NEXT:    movi v1.4s, #1
541; CHECK-CVT-NEXT:    ushll v0.4s, v0.4h, #0
542; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
543; CHECK-CVT-NEXT:    ushr v2.4s, v0.4s, #16
544; CHECK-CVT-NEXT:    and v1.16b, v2.16b, v1.16b
545; CHECK-CVT-NEXT:    movi v2.4s, #127, msl #8
546; CHECK-CVT-NEXT:    add v0.4s, v1.4s, v0.4s
547; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v2.4s
548; CHECK-CVT-NEXT:    ret
549;
550; CHECK-BF16-LABEL: uitofp_v4i8:
551; CHECK-BF16:       // %bb.0:
552; CHECK-BF16-NEXT:    bic v0.4h, #255, lsl #8
553; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
554; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
555; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
556; CHECK-BF16-NEXT:    ret
557  %1 = uitofp <4 x i8> %a to <4 x bfloat>
558  ret <4 x bfloat> %1
559}
560
561define <8 x bfloat> @uitofp_v8i8(<8 x i8> %a) #0 {
562; CHECK-CVT-LABEL: uitofp_v8i8:
563; CHECK-CVT:       // %bb.0:
564; CHECK-CVT-NEXT:    ushll v0.8h, v0.8b, #0
565; CHECK-CVT-NEXT:    movi v1.4s, #1
566; CHECK-CVT-NEXT:    movi v4.4s, #127, msl #8
567; CHECK-CVT-NEXT:    ushll v2.4s, v0.4h, #0
568; CHECK-CVT-NEXT:    ushll2 v0.4s, v0.8h, #0
569; CHECK-CVT-NEXT:    ucvtf v2.4s, v2.4s
570; CHECK-CVT-NEXT:    ucvtf v3.4s, v0.4s
571; CHECK-CVT-NEXT:    ushr v0.4s, v2.4s, #16
572; CHECK-CVT-NEXT:    ushr v5.4s, v3.4s, #16
573; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v1.16b
574; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
575; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v4.4s
576; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v4.4s
577; CHECK-CVT-NEXT:    addhn v0.4h, v2.4s, v0.4s
578; CHECK-CVT-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
579; CHECK-CVT-NEXT:    ret
580;
581; CHECK-BF16-LABEL: uitofp_v8i8:
582; CHECK-BF16:       // %bb.0:
583; CHECK-BF16-NEXT:    ushll v0.8h, v0.8b, #0
584; CHECK-BF16-NEXT:    ushll v1.4s, v0.4h, #0
585; CHECK-BF16-NEXT:    ushll2 v2.4s, v0.8h, #0
586; CHECK-BF16-NEXT:    ucvtf v1.4s, v1.4s
587; CHECK-BF16-NEXT:    bfcvtn v0.4h, v1.4s
588; CHECK-BF16-NEXT:    ucvtf v1.4s, v2.4s
589; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
590; CHECK-BF16-NEXT:    ret
591  %1 = uitofp <8 x i8> %a to <8 x bfloat>
592  ret <8 x bfloat> %1
593}
594
595define <16 x bfloat> @uitofp_v16i8(<16 x i8> %a) #0 {
596; CHECK-CVT-LABEL: uitofp_v16i8:
597; CHECK-CVT:       // %bb.0:
598; CHECK-CVT-NEXT:    ushll2 v2.8h, v0.16b, #0
599; CHECK-CVT-NEXT:    ushll v0.8h, v0.8b, #0
600; CHECK-CVT-NEXT:    movi v1.4s, #1
601; CHECK-CVT-NEXT:    movi v7.4s, #127, msl #8
602; CHECK-CVT-NEXT:    ushll v3.4s, v2.4h, #0
603; CHECK-CVT-NEXT:    ushll v4.4s, v0.4h, #0
604; CHECK-CVT-NEXT:    ushll2 v2.4s, v2.8h, #0
605; CHECK-CVT-NEXT:    ushll2 v0.4s, v0.8h, #0
606; CHECK-CVT-NEXT:    ucvtf v3.4s, v3.4s
607; CHECK-CVT-NEXT:    ucvtf v4.4s, v4.4s
608; CHECK-CVT-NEXT:    ucvtf v2.4s, v2.4s
609; CHECK-CVT-NEXT:    ucvtf v6.4s, v0.4s
610; CHECK-CVT-NEXT:    ushr v5.4s, v3.4s, #16
611; CHECK-CVT-NEXT:    ushr v0.4s, v4.4s, #16
612; CHECK-CVT-NEXT:    ushr v16.4s, v2.4s, #16
613; CHECK-CVT-NEXT:    ushr v17.4s, v6.4s, #16
614; CHECK-CVT-NEXT:    and v5.16b, v5.16b, v1.16b
615; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v1.16b
616; CHECK-CVT-NEXT:    and v16.16b, v16.16b, v1.16b
617; CHECK-CVT-NEXT:    and v17.16b, v17.16b, v1.16b
618; CHECK-CVT-NEXT:    add v5.4s, v5.4s, v7.4s
619; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v7.4s
620; CHECK-CVT-NEXT:    addhn v1.4h, v3.4s, v5.4s
621; CHECK-CVT-NEXT:    addhn v0.4h, v4.4s, v0.4s
622; CHECK-CVT-NEXT:    add v3.4s, v16.4s, v7.4s
623; CHECK-CVT-NEXT:    add v4.4s, v17.4s, v7.4s
624; CHECK-CVT-NEXT:    addhn2 v1.8h, v2.4s, v3.4s
625; CHECK-CVT-NEXT:    addhn2 v0.8h, v6.4s, v4.4s
626; CHECK-CVT-NEXT:    ret
627;
628; CHECK-BF16-LABEL: uitofp_v16i8:
629; CHECK-BF16:       // %bb.0:
630; CHECK-BF16-NEXT:    ushll2 v1.8h, v0.16b, #0
631; CHECK-BF16-NEXT:    ushll v0.8h, v0.8b, #0
632; CHECK-BF16-NEXT:    ushll v2.4s, v1.4h, #0
633; CHECK-BF16-NEXT:    ushll v3.4s, v0.4h, #0
634; CHECK-BF16-NEXT:    ushll2 v4.4s, v1.8h, #0
635; CHECK-BF16-NEXT:    ushll2 v5.4s, v0.8h, #0
636; CHECK-BF16-NEXT:    ucvtf v2.4s, v2.4s
637; CHECK-BF16-NEXT:    ucvtf v3.4s, v3.4s
638; CHECK-BF16-NEXT:    bfcvtn v1.4h, v2.4s
639; CHECK-BF16-NEXT:    ucvtf v2.4s, v4.4s
640; CHECK-BF16-NEXT:    bfcvtn v0.4h, v3.4s
641; CHECK-BF16-NEXT:    ucvtf v3.4s, v5.4s
642; CHECK-BF16-NEXT:    bfcvtn2 v1.8h, v2.4s
643; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v3.4s
644; CHECK-BF16-NEXT:    ret
645  %1 = uitofp <16 x i8> %a to <16 x bfloat>
646  ret <16 x bfloat> %1
647}
648
649
650define <8 x bfloat> @uitofp_i16(<8 x i16> %a) #0 {
651; CHECK-CVT-LABEL: uitofp_i16:
652; CHECK-CVT:       // %bb.0:
653; CHECK-CVT-NEXT:    ushll v2.4s, v0.4h, #0
654; CHECK-CVT-NEXT:    ushll2 v0.4s, v0.8h, #0
655; CHECK-CVT-NEXT:    movi v1.4s, #1
656; CHECK-CVT-NEXT:    movi v4.4s, #127, msl #8
657; CHECK-CVT-NEXT:    ucvtf v2.4s, v2.4s
658; CHECK-CVT-NEXT:    ucvtf v3.4s, v0.4s
659; CHECK-CVT-NEXT:    ushr v0.4s, v2.4s, #16
660; CHECK-CVT-NEXT:    ushr v5.4s, v3.4s, #16
661; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v1.16b
662; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
663; CHECK-CVT-NEXT:    add v0.4s, v0.4s, v4.4s
664; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v4.4s
665; CHECK-CVT-NEXT:    addhn v0.4h, v2.4s, v0.4s
666; CHECK-CVT-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
667; CHECK-CVT-NEXT:    ret
668;
669; CHECK-BF16-LABEL: uitofp_i16:
670; CHECK-BF16:       // %bb.0:
671; CHECK-BF16-NEXT:    ushll v1.4s, v0.4h, #0
672; CHECK-BF16-NEXT:    ushll2 v2.4s, v0.8h, #0
673; CHECK-BF16-NEXT:    ucvtf v1.4s, v1.4s
674; CHECK-BF16-NEXT:    bfcvtn v0.4h, v1.4s
675; CHECK-BF16-NEXT:    ucvtf v1.4s, v2.4s
676; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
677; CHECK-BF16-NEXT:    ret
678  %1 = uitofp <8 x i16> %a to <8 x bfloat>
679  ret <8 x bfloat> %1
680}
681
682
683define <8 x bfloat> @uitofp_i32(<8 x i32> %a) #0 {
684; CHECK-CVT-LABEL: uitofp_i32:
685; CHECK-CVT:       // %bb.0:
686; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
687; CHECK-CVT-NEXT:    movi v2.4s, #1
688; CHECK-CVT-NEXT:    ucvtf v1.4s, v1.4s
689; CHECK-CVT-NEXT:    movi v5.4s, #127, msl #8
690; CHECK-CVT-NEXT:    ushr v3.4s, v0.4s, #16
691; CHECK-CVT-NEXT:    ushr v4.4s, v1.4s, #16
692; CHECK-CVT-NEXT:    and v3.16b, v3.16b, v2.16b
693; CHECK-CVT-NEXT:    and v2.16b, v4.16b, v2.16b
694; CHECK-CVT-NEXT:    add v0.4s, v3.4s, v0.4s
695; CHECK-CVT-NEXT:    add v1.4s, v2.4s, v1.4s
696; CHECK-CVT-NEXT:    addhn v0.4h, v0.4s, v5.4s
697; CHECK-CVT-NEXT:    addhn2 v0.8h, v1.4s, v5.4s
698; CHECK-CVT-NEXT:    ret
699;
700; CHECK-BF16-LABEL: uitofp_i32:
701; CHECK-BF16:       // %bb.0:
702; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
703; CHECK-BF16-NEXT:    ucvtf v1.4s, v1.4s
704; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
705; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v1.4s
706; CHECK-BF16-NEXT:    ret
707  %1 = uitofp <8 x i32> %a to <8 x bfloat>
708  ret <8 x bfloat> %1
709}
710
711
712define <8 x bfloat> @uitofp_i64(<8 x i64> %a) #0 {
713; CHECK-CVT-LABEL: uitofp_i64:
714; CHECK-CVT:       // %bb.0:
715; CHECK-CVT-NEXT:    ucvtf v2.2d, v2.2d
716; CHECK-CVT-NEXT:    ucvtf v0.2d, v0.2d
717; CHECK-CVT-NEXT:    ucvtf v3.2d, v3.2d
718; CHECK-CVT-NEXT:    ucvtf v1.2d, v1.2d
719; CHECK-CVT-NEXT:    fcvtn v2.2s, v2.2d
720; CHECK-CVT-NEXT:    fcvtn v0.2s, v0.2d
721; CHECK-CVT-NEXT:    fcvtn2 v2.4s, v3.2d
722; CHECK-CVT-NEXT:    fcvtn2 v0.4s, v1.2d
723; CHECK-CVT-NEXT:    movi v1.4s, #1
724; CHECK-CVT-NEXT:    movi v3.4s, #127, msl #8
725; CHECK-CVT-NEXT:    ushr v4.4s, v2.4s, #16
726; CHECK-CVT-NEXT:    ushr v5.4s, v0.4s, #16
727; CHECK-CVT-NEXT:    add v6.4s, v2.4s, v3.4s
728; CHECK-CVT-NEXT:    add v3.4s, v0.4s, v3.4s
729; CHECK-CVT-NEXT:    and v4.16b, v4.16b, v1.16b
730; CHECK-CVT-NEXT:    and v1.16b, v5.16b, v1.16b
731; CHECK-CVT-NEXT:    fcmeq v5.4s, v2.4s, v2.4s
732; CHECK-CVT-NEXT:    orr v2.4s, #64, lsl #16
733; CHECK-CVT-NEXT:    add v4.4s, v4.4s, v6.4s
734; CHECK-CVT-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
735; CHECK-CVT-NEXT:    add v1.4s, v1.4s, v3.4s
736; CHECK-CVT-NEXT:    orr v0.4s, #64, lsl #16
737; CHECK-CVT-NEXT:    bit v2.16b, v4.16b, v5.16b
738; CHECK-CVT-NEXT:    bit v0.16b, v1.16b, v6.16b
739; CHECK-CVT-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
740; CHECK-CVT-NEXT:    ret
741;
742; CHECK-BF16-LABEL: uitofp_i64:
743; CHECK-BF16:       // %bb.0:
744; CHECK-BF16-NEXT:    ucvtf v0.2d, v0.2d
745; CHECK-BF16-NEXT:    ucvtf v2.2d, v2.2d
746; CHECK-BF16-NEXT:    ucvtf v1.2d, v1.2d
747; CHECK-BF16-NEXT:    ucvtf v3.2d, v3.2d
748; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
749; CHECK-BF16-NEXT:    fcvtn v2.2s, v2.2d
750; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
751; CHECK-BF16-NEXT:    fcvtn2 v2.4s, v3.2d
752; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
753; CHECK-BF16-NEXT:    bfcvtn2 v0.8h, v2.4s
754; CHECK-BF16-NEXT:    ret
755  %1 = uitofp <8 x i64> %a to <8 x bfloat>
756  ret <8 x bfloat> %1
757}
758
759define void @test_insert_at_zero(bfloat %a, ptr %b) #0 {
760; CHECK-LABEL: test_insert_at_zero:
761; CHECK:       // %bb.0:
762; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
763; CHECK-NEXT:    str q0, [x0]
764; CHECK-NEXT:    ret
765  %1 = insertelement <8 x bfloat> undef, bfloat %a, i64 0
766  store <8 x bfloat> %1, ptr %b, align 4
767  ret void
768}
769
770define <8 x i8> @fptosi_i8(<8 x bfloat> %a) #0 {
771; CHECK-LABEL: fptosi_i8:
772; CHECK:       // %bb.0:
773; CHECK-NEXT:    shll2 v1.4s, v0.8h, #16
774; CHECK-NEXT:    shll v0.4s, v0.4h, #16
775; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
776; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
777; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
778; CHECK-NEXT:    xtn v0.8b, v0.8h
779; CHECK-NEXT:    ret
780  %1 = fptosi<8 x bfloat> %a to <8 x i8>
781  ret <8 x i8> %1
782}
783
784define <8 x i16> @fptosi_i16(<8 x bfloat> %a) #0 {
785; CHECK-LABEL: fptosi_i16:
786; CHECK:       // %bb.0:
787; CHECK-NEXT:    shll2 v1.4s, v0.8h, #16
788; CHECK-NEXT:    shll v0.4s, v0.4h, #16
789; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
790; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
791; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
792; CHECK-NEXT:    ret
793  %1 = fptosi<8 x bfloat> %a to <8 x i16>
794  ret <8 x i16> %1
795}
796
797define <8 x i8> @fptoui_i8(<8 x bfloat> %a) #0 {
798; CHECK-LABEL: fptoui_i8:
799; CHECK:       // %bb.0:
800; CHECK-NEXT:    shll2 v1.4s, v0.8h, #16
801; CHECK-NEXT:    shll v0.4s, v0.4h, #16
802; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
803; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
804; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
805; CHECK-NEXT:    xtn v0.8b, v0.8h
806; CHECK-NEXT:    ret
807  %1 = fptoui<8 x bfloat> %a to <8 x i8>
808  ret <8 x i8> %1
809}
810
811define <8 x i16> @fptoui_i16(<8 x bfloat> %a) #0 {
812; CHECK-LABEL: fptoui_i16:
813; CHECK:       // %bb.0:
814; CHECK-NEXT:    shll2 v1.4s, v0.8h, #16
815; CHECK-NEXT:    shll v0.4s, v0.4h, #16
816; CHECK-NEXT:    fcvtzu v1.4s, v1.4s
817; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
818; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
819; CHECK-NEXT:    ret
820  %1 = fptoui<8 x bfloat> %a to <8 x i16>
821  ret <8 x i16> %1
822}
823
824define <8 x i1> @test_fcmp_une(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
825; CHECK-LABEL: test_fcmp_une:
826; CHECK:       // %bb.0:
827; CHECK-NEXT:    dup v2.4h, v1.h[1]
828; CHECK-NEXT:    dup v3.4h, v0.h[1]
829; CHECK-NEXT:    dup v4.4h, v1.h[2]
830; CHECK-NEXT:    dup v5.4h, v0.h[2]
831; CHECK-NEXT:    dup v6.4h, v0.h[3]
832; CHECK-NEXT:    shll v2.4s, v2.4h, #16
833; CHECK-NEXT:    shll v3.4s, v3.4h, #16
834; CHECK-NEXT:    fcmp s3, s2
835; CHECK-NEXT:    shll v2.4s, v1.4h, #16
836; CHECK-NEXT:    shll v3.4s, v0.4h, #16
837; CHECK-NEXT:    csetm w8, ne
838; CHECK-NEXT:    fcmp s3, s2
839; CHECK-NEXT:    shll v3.4s, v4.4h, #16
840; CHECK-NEXT:    shll v4.4s, v5.4h, #16
841; CHECK-NEXT:    dup v5.4h, v1.h[3]
842; CHECK-NEXT:    csetm w9, ne
843; CHECK-NEXT:    fmov s2, w9
844; CHECK-NEXT:    fcmp s4, s3
845; CHECK-NEXT:    shll v4.4s, v6.4h, #16
846; CHECK-NEXT:    shll v3.4s, v5.4h, #16
847; CHECK-NEXT:    dup v5.8h, v1.h[4]
848; CHECK-NEXT:    dup v6.8h, v0.h[4]
849; CHECK-NEXT:    mov v2.h[1], w8
850; CHECK-NEXT:    csetm w8, ne
851; CHECK-NEXT:    fcmp s4, s3
852; CHECK-NEXT:    shll v3.4s, v5.4h, #16
853; CHECK-NEXT:    shll v4.4s, v6.4h, #16
854; CHECK-NEXT:    dup v5.8h, v1.h[5]
855; CHECK-NEXT:    dup v6.8h, v0.h[5]
856; CHECK-NEXT:    mov v2.h[2], w8
857; CHECK-NEXT:    csetm w8, ne
858; CHECK-NEXT:    fcmp s4, s3
859; CHECK-NEXT:    shll v3.4s, v5.4h, #16
860; CHECK-NEXT:    shll v4.4s, v6.4h, #16
861; CHECK-NEXT:    dup v5.8h, v1.h[6]
862; CHECK-NEXT:    dup v6.8h, v0.h[6]
863; CHECK-NEXT:    dup v1.8h, v1.h[7]
864; CHECK-NEXT:    dup v0.8h, v0.h[7]
865; CHECK-NEXT:    mov v2.h[3], w8
866; CHECK-NEXT:    csetm w8, ne
867; CHECK-NEXT:    fcmp s4, s3
868; CHECK-NEXT:    shll v3.4s, v5.4h, #16
869; CHECK-NEXT:    shll v4.4s, v6.4h, #16
870; CHECK-NEXT:    shll v1.4s, v1.4h, #16
871; CHECK-NEXT:    shll v0.4s, v0.4h, #16
872; CHECK-NEXT:    mov v2.h[4], w8
873; CHECK-NEXT:    csetm w8, ne
874; CHECK-NEXT:    fcmp s4, s3
875; CHECK-NEXT:    mov v2.h[5], w8
876; CHECK-NEXT:    csetm w8, ne
877; CHECK-NEXT:    fcmp s0, s1
878; CHECK-NEXT:    mov v2.h[6], w8
879; CHECK-NEXT:    csetm w8, ne
880; CHECK-NEXT:    mov v2.h[7], w8
881; CHECK-NEXT:    xtn v0.8b, v2.8h
882; CHECK-NEXT:    ret
883  %1 = fcmp une <8 x bfloat> %a, %b
884  ret <8 x i1> %1
885}
886
887define <8 x i1> @test_fcmp_ueq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
888; CHECK-LABEL: test_fcmp_ueq:
889; CHECK:       // %bb.0:
890; CHECK-NEXT:    dup v2.4h, v1.h[1]
891; CHECK-NEXT:    dup v3.4h, v0.h[1]
892; CHECK-NEXT:    dup v4.4h, v1.h[2]
893; CHECK-NEXT:    dup v5.4h, v0.h[2]
894; CHECK-NEXT:    dup v6.4h, v0.h[3]
895; CHECK-NEXT:    shll v2.4s, v2.4h, #16
896; CHECK-NEXT:    shll v3.4s, v3.4h, #16
897; CHECK-NEXT:    fcmp s3, s2
898; CHECK-NEXT:    shll v2.4s, v1.4h, #16
899; CHECK-NEXT:    shll v3.4s, v0.4h, #16
900; CHECK-NEXT:    csetm w8, eq
901; CHECK-NEXT:    csinv w8, w8, wzr, vc
902; CHECK-NEXT:    fcmp s3, s2
903; CHECK-NEXT:    shll v3.4s, v4.4h, #16
904; CHECK-NEXT:    shll v4.4s, v5.4h, #16
905; CHECK-NEXT:    dup v5.4h, v1.h[3]
906; CHECK-NEXT:    csetm w9, eq
907; CHECK-NEXT:    csinv w9, w9, wzr, vc
908; CHECK-NEXT:    fcmp s4, s3
909; CHECK-NEXT:    shll v4.4s, v6.4h, #16
910; CHECK-NEXT:    fmov s2, w9
911; CHECK-NEXT:    shll v3.4s, v5.4h, #16
912; CHECK-NEXT:    dup v5.8h, v1.h[4]
913; CHECK-NEXT:    dup v6.8h, v0.h[4]
914; CHECK-NEXT:    mov v2.h[1], w8
915; CHECK-NEXT:    csetm w8, eq
916; CHECK-NEXT:    csinv w8, w8, wzr, vc
917; CHECK-NEXT:    fcmp s4, s3
918; CHECK-NEXT:    shll v3.4s, v5.4h, #16
919; CHECK-NEXT:    shll v4.4s, v6.4h, #16
920; CHECK-NEXT:    dup v5.8h, v1.h[5]
921; CHECK-NEXT:    dup v6.8h, v0.h[5]
922; CHECK-NEXT:    mov v2.h[2], w8
923; CHECK-NEXT:    csetm w8, eq
924; CHECK-NEXT:    csinv w8, w8, wzr, vc
925; CHECK-NEXT:    fcmp s4, s3
926; CHECK-NEXT:    shll v3.4s, v5.4h, #16
927; CHECK-NEXT:    shll v4.4s, v6.4h, #16
928; CHECK-NEXT:    dup v5.8h, v1.h[6]
929; CHECK-NEXT:    dup v6.8h, v0.h[6]
930; CHECK-NEXT:    dup v1.8h, v1.h[7]
931; CHECK-NEXT:    dup v0.8h, v0.h[7]
932; CHECK-NEXT:    mov v2.h[3], w8
933; CHECK-NEXT:    csetm w8, eq
934; CHECK-NEXT:    csinv w8, w8, wzr, vc
935; CHECK-NEXT:    fcmp s4, s3
936; CHECK-NEXT:    shll v3.4s, v5.4h, #16
937; CHECK-NEXT:    shll v4.4s, v6.4h, #16
938; CHECK-NEXT:    shll v1.4s, v1.4h, #16
939; CHECK-NEXT:    shll v0.4s, v0.4h, #16
940; CHECK-NEXT:    mov v2.h[4], w8
941; CHECK-NEXT:    csetm w8, eq
942; CHECK-NEXT:    csinv w8, w8, wzr, vc
943; CHECK-NEXT:    fcmp s4, s3
944; CHECK-NEXT:    mov v2.h[5], w8
945; CHECK-NEXT:    csetm w8, eq
946; CHECK-NEXT:    csinv w8, w8, wzr, vc
947; CHECK-NEXT:    fcmp s0, s1
948; CHECK-NEXT:    mov v2.h[6], w8
949; CHECK-NEXT:    csetm w8, eq
950; CHECK-NEXT:    csinv w8, w8, wzr, vc
951; CHECK-NEXT:    mov v2.h[7], w8
952; CHECK-NEXT:    xtn v0.8b, v2.8h
953; CHECK-NEXT:    ret
954  %1 = fcmp ueq <8 x bfloat> %a, %b
955  ret <8 x i1> %1
956}
957
958define <8 x i1> @test_fcmp_ugt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
959; CHECK-LABEL: test_fcmp_ugt:
960; CHECK:       // %bb.0:
961; CHECK-NEXT:    dup v2.4h, v1.h[1]
962; CHECK-NEXT:    dup v3.4h, v0.h[1]
963; CHECK-NEXT:    dup v4.4h, v1.h[2]
964; CHECK-NEXT:    dup v5.4h, v0.h[2]
965; CHECK-NEXT:    dup v6.4h, v0.h[3]
966; CHECK-NEXT:    shll v2.4s, v2.4h, #16
967; CHECK-NEXT:    shll v3.4s, v3.4h, #16
968; CHECK-NEXT:    fcmp s3, s2
969; CHECK-NEXT:    shll v2.4s, v1.4h, #16
970; CHECK-NEXT:    shll v3.4s, v0.4h, #16
971; CHECK-NEXT:    csetm w8, hi
972; CHECK-NEXT:    fcmp s3, s2
973; CHECK-NEXT:    shll v3.4s, v4.4h, #16
974; CHECK-NEXT:    shll v4.4s, v5.4h, #16
975; CHECK-NEXT:    dup v5.4h, v1.h[3]
976; CHECK-NEXT:    csetm w9, hi
977; CHECK-NEXT:    fmov s2, w9
978; CHECK-NEXT:    fcmp s4, s3
979; CHECK-NEXT:    shll v4.4s, v6.4h, #16
980; CHECK-NEXT:    shll v3.4s, v5.4h, #16
981; CHECK-NEXT:    dup v5.8h, v1.h[4]
982; CHECK-NEXT:    dup v6.8h, v0.h[4]
983; CHECK-NEXT:    mov v2.h[1], w8
984; CHECK-NEXT:    csetm w8, hi
985; CHECK-NEXT:    fcmp s4, s3
986; CHECK-NEXT:    shll v3.4s, v5.4h, #16
987; CHECK-NEXT:    shll v4.4s, v6.4h, #16
988; CHECK-NEXT:    dup v5.8h, v1.h[5]
989; CHECK-NEXT:    dup v6.8h, v0.h[5]
990; CHECK-NEXT:    mov v2.h[2], w8
991; CHECK-NEXT:    csetm w8, hi
992; CHECK-NEXT:    fcmp s4, s3
993; CHECK-NEXT:    shll v3.4s, v5.4h, #16
994; CHECK-NEXT:    shll v4.4s, v6.4h, #16
995; CHECK-NEXT:    dup v5.8h, v1.h[6]
996; CHECK-NEXT:    dup v6.8h, v0.h[6]
997; CHECK-NEXT:    dup v1.8h, v1.h[7]
998; CHECK-NEXT:    dup v0.8h, v0.h[7]
999; CHECK-NEXT:    mov v2.h[3], w8
1000; CHECK-NEXT:    csetm w8, hi
1001; CHECK-NEXT:    fcmp s4, s3
1002; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1003; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1004; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1005; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1006; CHECK-NEXT:    mov v2.h[4], w8
1007; CHECK-NEXT:    csetm w8, hi
1008; CHECK-NEXT:    fcmp s4, s3
1009; CHECK-NEXT:    mov v2.h[5], w8
1010; CHECK-NEXT:    csetm w8, hi
1011; CHECK-NEXT:    fcmp s0, s1
1012; CHECK-NEXT:    mov v2.h[6], w8
1013; CHECK-NEXT:    csetm w8, hi
1014; CHECK-NEXT:    mov v2.h[7], w8
1015; CHECK-NEXT:    xtn v0.8b, v2.8h
1016; CHECK-NEXT:    ret
1017  %1 = fcmp ugt <8 x bfloat> %a, %b
1018  ret <8 x i1> %1
1019}
1020
1021define <8 x i1> @test_fcmp_uge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1022; CHECK-LABEL: test_fcmp_uge:
1023; CHECK:       // %bb.0:
1024; CHECK-NEXT:    dup v2.4h, v1.h[1]
1025; CHECK-NEXT:    dup v3.4h, v0.h[1]
1026; CHECK-NEXT:    dup v4.4h, v1.h[2]
1027; CHECK-NEXT:    dup v5.4h, v0.h[2]
1028; CHECK-NEXT:    dup v6.4h, v0.h[3]
1029; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1030; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1031; CHECK-NEXT:    fcmp s3, s2
1032; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1033; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1034; CHECK-NEXT:    csetm w8, pl
1035; CHECK-NEXT:    fcmp s3, s2
1036; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1037; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1038; CHECK-NEXT:    dup v5.4h, v1.h[3]
1039; CHECK-NEXT:    csetm w9, pl
1040; CHECK-NEXT:    fmov s2, w9
1041; CHECK-NEXT:    fcmp s4, s3
1042; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1043; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1044; CHECK-NEXT:    dup v5.8h, v1.h[4]
1045; CHECK-NEXT:    dup v6.8h, v0.h[4]
1046; CHECK-NEXT:    mov v2.h[1], w8
1047; CHECK-NEXT:    csetm w8, pl
1048; CHECK-NEXT:    fcmp s4, s3
1049; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1050; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1051; CHECK-NEXT:    dup v5.8h, v1.h[5]
1052; CHECK-NEXT:    dup v6.8h, v0.h[5]
1053; CHECK-NEXT:    mov v2.h[2], w8
1054; CHECK-NEXT:    csetm w8, pl
1055; CHECK-NEXT:    fcmp s4, s3
1056; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1057; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1058; CHECK-NEXT:    dup v5.8h, v1.h[6]
1059; CHECK-NEXT:    dup v6.8h, v0.h[6]
1060; CHECK-NEXT:    dup v1.8h, v1.h[7]
1061; CHECK-NEXT:    dup v0.8h, v0.h[7]
1062; CHECK-NEXT:    mov v2.h[3], w8
1063; CHECK-NEXT:    csetm w8, pl
1064; CHECK-NEXT:    fcmp s4, s3
1065; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1066; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1067; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1068; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1069; CHECK-NEXT:    mov v2.h[4], w8
1070; CHECK-NEXT:    csetm w8, pl
1071; CHECK-NEXT:    fcmp s4, s3
1072; CHECK-NEXT:    mov v2.h[5], w8
1073; CHECK-NEXT:    csetm w8, pl
1074; CHECK-NEXT:    fcmp s0, s1
1075; CHECK-NEXT:    mov v2.h[6], w8
1076; CHECK-NEXT:    csetm w8, pl
1077; CHECK-NEXT:    mov v2.h[7], w8
1078; CHECK-NEXT:    xtn v0.8b, v2.8h
1079; CHECK-NEXT:    ret
1080  %1 = fcmp uge <8 x bfloat> %a, %b
1081  ret <8 x i1> %1
1082}
1083
1084define <8 x i1> @test_fcmp_ult(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1085; CHECK-LABEL: test_fcmp_ult:
1086; CHECK:       // %bb.0:
1087; CHECK-NEXT:    dup v2.4h, v1.h[1]
1088; CHECK-NEXT:    dup v3.4h, v0.h[1]
1089; CHECK-NEXT:    dup v4.4h, v1.h[2]
1090; CHECK-NEXT:    dup v5.4h, v0.h[2]
1091; CHECK-NEXT:    dup v6.4h, v0.h[3]
1092; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1093; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1094; CHECK-NEXT:    fcmp s3, s2
1095; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1096; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1097; CHECK-NEXT:    csetm w8, lt
1098; CHECK-NEXT:    fcmp s3, s2
1099; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1100; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1101; CHECK-NEXT:    dup v5.4h, v1.h[3]
1102; CHECK-NEXT:    csetm w9, lt
1103; CHECK-NEXT:    fmov s2, w9
1104; CHECK-NEXT:    fcmp s4, s3
1105; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1106; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1107; CHECK-NEXT:    dup v5.8h, v1.h[4]
1108; CHECK-NEXT:    dup v6.8h, v0.h[4]
1109; CHECK-NEXT:    mov v2.h[1], w8
1110; CHECK-NEXT:    csetm w8, lt
1111; CHECK-NEXT:    fcmp s4, s3
1112; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1113; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1114; CHECK-NEXT:    dup v5.8h, v1.h[5]
1115; CHECK-NEXT:    dup v6.8h, v0.h[5]
1116; CHECK-NEXT:    mov v2.h[2], w8
1117; CHECK-NEXT:    csetm w8, lt
1118; CHECK-NEXT:    fcmp s4, s3
1119; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1120; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1121; CHECK-NEXT:    dup v5.8h, v1.h[6]
1122; CHECK-NEXT:    dup v6.8h, v0.h[6]
1123; CHECK-NEXT:    dup v1.8h, v1.h[7]
1124; CHECK-NEXT:    dup v0.8h, v0.h[7]
1125; CHECK-NEXT:    mov v2.h[3], w8
1126; CHECK-NEXT:    csetm w8, lt
1127; CHECK-NEXT:    fcmp s4, s3
1128; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1129; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1130; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1131; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1132; CHECK-NEXT:    mov v2.h[4], w8
1133; CHECK-NEXT:    csetm w8, lt
1134; CHECK-NEXT:    fcmp s4, s3
1135; CHECK-NEXT:    mov v2.h[5], w8
1136; CHECK-NEXT:    csetm w8, lt
1137; CHECK-NEXT:    fcmp s0, s1
1138; CHECK-NEXT:    mov v2.h[6], w8
1139; CHECK-NEXT:    csetm w8, lt
1140; CHECK-NEXT:    mov v2.h[7], w8
1141; CHECK-NEXT:    xtn v0.8b, v2.8h
1142; CHECK-NEXT:    ret
1143  %1 = fcmp ult <8 x bfloat> %a, %b
1144  ret <8 x i1> %1
1145}
1146
1147define <8 x i1> @test_fcmp_ule(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1148; CHECK-LABEL: test_fcmp_ule:
1149; CHECK:       // %bb.0:
1150; CHECK-NEXT:    dup v2.4h, v1.h[1]
1151; CHECK-NEXT:    dup v3.4h, v0.h[1]
1152; CHECK-NEXT:    dup v4.4h, v1.h[2]
1153; CHECK-NEXT:    dup v5.4h, v0.h[2]
1154; CHECK-NEXT:    dup v6.4h, v0.h[3]
1155; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1156; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1157; CHECK-NEXT:    fcmp s3, s2
1158; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1159; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1160; CHECK-NEXT:    csetm w8, le
1161; CHECK-NEXT:    fcmp s3, s2
1162; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1163; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1164; CHECK-NEXT:    dup v5.4h, v1.h[3]
1165; CHECK-NEXT:    csetm w9, le
1166; CHECK-NEXT:    fmov s2, w9
1167; CHECK-NEXT:    fcmp s4, s3
1168; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1169; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1170; CHECK-NEXT:    dup v5.8h, v1.h[4]
1171; CHECK-NEXT:    dup v6.8h, v0.h[4]
1172; CHECK-NEXT:    mov v2.h[1], w8
1173; CHECK-NEXT:    csetm w8, le
1174; CHECK-NEXT:    fcmp s4, s3
1175; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1176; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1177; CHECK-NEXT:    dup v5.8h, v1.h[5]
1178; CHECK-NEXT:    dup v6.8h, v0.h[5]
1179; CHECK-NEXT:    mov v2.h[2], w8
1180; CHECK-NEXT:    csetm w8, le
1181; CHECK-NEXT:    fcmp s4, s3
1182; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1183; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1184; CHECK-NEXT:    dup v5.8h, v1.h[6]
1185; CHECK-NEXT:    dup v6.8h, v0.h[6]
1186; CHECK-NEXT:    dup v1.8h, v1.h[7]
1187; CHECK-NEXT:    dup v0.8h, v0.h[7]
1188; CHECK-NEXT:    mov v2.h[3], w8
1189; CHECK-NEXT:    csetm w8, le
1190; CHECK-NEXT:    fcmp s4, s3
1191; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1192; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1193; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1194; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1195; CHECK-NEXT:    mov v2.h[4], w8
1196; CHECK-NEXT:    csetm w8, le
1197; CHECK-NEXT:    fcmp s4, s3
1198; CHECK-NEXT:    mov v2.h[5], w8
1199; CHECK-NEXT:    csetm w8, le
1200; CHECK-NEXT:    fcmp s0, s1
1201; CHECK-NEXT:    mov v2.h[6], w8
1202; CHECK-NEXT:    csetm w8, le
1203; CHECK-NEXT:    mov v2.h[7], w8
1204; CHECK-NEXT:    xtn v0.8b, v2.8h
1205; CHECK-NEXT:    ret
1206  %1 = fcmp ule <8 x bfloat> %a, %b
1207  ret <8 x i1> %1
1208}
1209
1210define <8 x i1> @test_fcmp_uno(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1211; CHECK-LABEL: test_fcmp_uno:
1212; CHECK:       // %bb.0:
1213; CHECK-NEXT:    dup v2.4h, v1.h[1]
1214; CHECK-NEXT:    dup v3.4h, v0.h[1]
1215; CHECK-NEXT:    dup v4.4h, v1.h[2]
1216; CHECK-NEXT:    dup v5.4h, v0.h[2]
1217; CHECK-NEXT:    dup v6.4h, v0.h[3]
1218; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1219; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1220; CHECK-NEXT:    fcmp s3, s2
1221; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1222; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1223; CHECK-NEXT:    csetm w8, vs
1224; CHECK-NEXT:    fcmp s3, s2
1225; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1226; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1227; CHECK-NEXT:    dup v5.4h, v1.h[3]
1228; CHECK-NEXT:    csetm w9, vs
1229; CHECK-NEXT:    fmov s2, w9
1230; CHECK-NEXT:    fcmp s4, s3
1231; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1232; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1233; CHECK-NEXT:    dup v5.8h, v1.h[4]
1234; CHECK-NEXT:    dup v6.8h, v0.h[4]
1235; CHECK-NEXT:    mov v2.h[1], w8
1236; CHECK-NEXT:    csetm w8, vs
1237; CHECK-NEXT:    fcmp s4, s3
1238; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1239; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1240; CHECK-NEXT:    dup v5.8h, v1.h[5]
1241; CHECK-NEXT:    dup v6.8h, v0.h[5]
1242; CHECK-NEXT:    mov v2.h[2], w8
1243; CHECK-NEXT:    csetm w8, vs
1244; CHECK-NEXT:    fcmp s4, s3
1245; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1246; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1247; CHECK-NEXT:    dup v5.8h, v1.h[6]
1248; CHECK-NEXT:    dup v6.8h, v0.h[6]
1249; CHECK-NEXT:    dup v1.8h, v1.h[7]
1250; CHECK-NEXT:    dup v0.8h, v0.h[7]
1251; CHECK-NEXT:    mov v2.h[3], w8
1252; CHECK-NEXT:    csetm w8, vs
1253; CHECK-NEXT:    fcmp s4, s3
1254; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1255; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1256; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1257; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1258; CHECK-NEXT:    mov v2.h[4], w8
1259; CHECK-NEXT:    csetm w8, vs
1260; CHECK-NEXT:    fcmp s4, s3
1261; CHECK-NEXT:    mov v2.h[5], w8
1262; CHECK-NEXT:    csetm w8, vs
1263; CHECK-NEXT:    fcmp s0, s1
1264; CHECK-NEXT:    mov v2.h[6], w8
1265; CHECK-NEXT:    csetm w8, vs
1266; CHECK-NEXT:    mov v2.h[7], w8
1267; CHECK-NEXT:    xtn v0.8b, v2.8h
1268; CHECK-NEXT:    ret
1269  %1 = fcmp uno <8 x bfloat> %a, %b
1270  ret <8 x i1> %1
1271}
1272
1273define <8 x i1> @test_fcmp_one(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1274; CHECK-LABEL: test_fcmp_one:
1275; CHECK:       // %bb.0:
1276; CHECK-NEXT:    dup v2.4h, v1.h[1]
1277; CHECK-NEXT:    dup v3.4h, v0.h[1]
1278; CHECK-NEXT:    dup v4.4h, v1.h[2]
1279; CHECK-NEXT:    dup v5.4h, v0.h[2]
1280; CHECK-NEXT:    dup v6.4h, v0.h[3]
1281; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1282; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1283; CHECK-NEXT:    fcmp s3, s2
1284; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1285; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1286; CHECK-NEXT:    csetm w8, mi
1287; CHECK-NEXT:    csinv w8, w8, wzr, le
1288; CHECK-NEXT:    fcmp s3, s2
1289; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1290; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1291; CHECK-NEXT:    dup v5.4h, v1.h[3]
1292; CHECK-NEXT:    csetm w9, mi
1293; CHECK-NEXT:    csinv w9, w9, wzr, le
1294; CHECK-NEXT:    fcmp s4, s3
1295; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1296; CHECK-NEXT:    fmov s2, w9
1297; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1298; CHECK-NEXT:    dup v5.8h, v1.h[4]
1299; CHECK-NEXT:    dup v6.8h, v0.h[4]
1300; CHECK-NEXT:    mov v2.h[1], w8
1301; CHECK-NEXT:    csetm w8, mi
1302; CHECK-NEXT:    csinv w8, w8, wzr, le
1303; CHECK-NEXT:    fcmp s4, s3
1304; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1305; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1306; CHECK-NEXT:    dup v5.8h, v1.h[5]
1307; CHECK-NEXT:    dup v6.8h, v0.h[5]
1308; CHECK-NEXT:    mov v2.h[2], w8
1309; CHECK-NEXT:    csetm w8, mi
1310; CHECK-NEXT:    csinv w8, w8, wzr, le
1311; CHECK-NEXT:    fcmp s4, s3
1312; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1313; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1314; CHECK-NEXT:    dup v5.8h, v1.h[6]
1315; CHECK-NEXT:    dup v6.8h, v0.h[6]
1316; CHECK-NEXT:    dup v1.8h, v1.h[7]
1317; CHECK-NEXT:    dup v0.8h, v0.h[7]
1318; CHECK-NEXT:    mov v2.h[3], w8
1319; CHECK-NEXT:    csetm w8, mi
1320; CHECK-NEXT:    csinv w8, w8, wzr, le
1321; CHECK-NEXT:    fcmp s4, s3
1322; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1323; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1324; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1325; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1326; CHECK-NEXT:    mov v2.h[4], w8
1327; CHECK-NEXT:    csetm w8, mi
1328; CHECK-NEXT:    csinv w8, w8, wzr, le
1329; CHECK-NEXT:    fcmp s4, s3
1330; CHECK-NEXT:    mov v2.h[5], w8
1331; CHECK-NEXT:    csetm w8, mi
1332; CHECK-NEXT:    csinv w8, w8, wzr, le
1333; CHECK-NEXT:    fcmp s0, s1
1334; CHECK-NEXT:    mov v2.h[6], w8
1335; CHECK-NEXT:    csetm w8, mi
1336; CHECK-NEXT:    csinv w8, w8, wzr, le
1337; CHECK-NEXT:    mov v2.h[7], w8
1338; CHECK-NEXT:    xtn v0.8b, v2.8h
1339; CHECK-NEXT:    ret
1340  %1 = fcmp one <8 x bfloat> %a, %b
1341  ret <8 x i1> %1
1342}
1343
1344define <8 x i1> @test_fcmp_oeq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1345; CHECK-LABEL: test_fcmp_oeq:
1346; CHECK:       // %bb.0:
1347; CHECK-NEXT:    dup v2.4h, v1.h[1]
1348; CHECK-NEXT:    dup v3.4h, v0.h[1]
1349; CHECK-NEXT:    dup v4.4h, v1.h[2]
1350; CHECK-NEXT:    dup v5.4h, v0.h[2]
1351; CHECK-NEXT:    dup v6.4h, v0.h[3]
1352; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1353; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1354; CHECK-NEXT:    fcmp s3, s2
1355; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1356; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1357; CHECK-NEXT:    csetm w8, eq
1358; CHECK-NEXT:    fcmp s3, s2
1359; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1360; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1361; CHECK-NEXT:    dup v5.4h, v1.h[3]
1362; CHECK-NEXT:    csetm w9, eq
1363; CHECK-NEXT:    fmov s2, w9
1364; CHECK-NEXT:    fcmp s4, s3
1365; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1366; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1367; CHECK-NEXT:    dup v5.8h, v1.h[4]
1368; CHECK-NEXT:    dup v6.8h, v0.h[4]
1369; CHECK-NEXT:    mov v2.h[1], w8
1370; CHECK-NEXT:    csetm w8, eq
1371; CHECK-NEXT:    fcmp s4, s3
1372; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1373; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1374; CHECK-NEXT:    dup v5.8h, v1.h[5]
1375; CHECK-NEXT:    dup v6.8h, v0.h[5]
1376; CHECK-NEXT:    mov v2.h[2], w8
1377; CHECK-NEXT:    csetm w8, eq
1378; CHECK-NEXT:    fcmp s4, s3
1379; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1380; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1381; CHECK-NEXT:    dup v5.8h, v1.h[6]
1382; CHECK-NEXT:    dup v6.8h, v0.h[6]
1383; CHECK-NEXT:    dup v1.8h, v1.h[7]
1384; CHECK-NEXT:    dup v0.8h, v0.h[7]
1385; CHECK-NEXT:    mov v2.h[3], w8
1386; CHECK-NEXT:    csetm w8, eq
1387; CHECK-NEXT:    fcmp s4, s3
1388; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1389; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1390; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1391; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1392; CHECK-NEXT:    mov v2.h[4], w8
1393; CHECK-NEXT:    csetm w8, eq
1394; CHECK-NEXT:    fcmp s4, s3
1395; CHECK-NEXT:    mov v2.h[5], w8
1396; CHECK-NEXT:    csetm w8, eq
1397; CHECK-NEXT:    fcmp s0, s1
1398; CHECK-NEXT:    mov v2.h[6], w8
1399; CHECK-NEXT:    csetm w8, eq
1400; CHECK-NEXT:    mov v2.h[7], w8
1401; CHECK-NEXT:    xtn v0.8b, v2.8h
1402; CHECK-NEXT:    ret
1403  %1 = fcmp oeq <8 x bfloat> %a, %b
1404  ret <8 x i1> %1
1405}
1406
1407define <8 x i1> @test_fcmp_ogt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1408; CHECK-LABEL: test_fcmp_ogt:
1409; CHECK:       // %bb.0:
1410; CHECK-NEXT:    dup v2.4h, v1.h[1]
1411; CHECK-NEXT:    dup v3.4h, v0.h[1]
1412; CHECK-NEXT:    dup v4.4h, v1.h[2]
1413; CHECK-NEXT:    dup v5.4h, v0.h[2]
1414; CHECK-NEXT:    dup v6.4h, v0.h[3]
1415; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1416; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1417; CHECK-NEXT:    fcmp s3, s2
1418; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1419; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1420; CHECK-NEXT:    csetm w8, gt
1421; CHECK-NEXT:    fcmp s3, s2
1422; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1423; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1424; CHECK-NEXT:    dup v5.4h, v1.h[3]
1425; CHECK-NEXT:    csetm w9, gt
1426; CHECK-NEXT:    fmov s2, w9
1427; CHECK-NEXT:    fcmp s4, s3
1428; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1429; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1430; CHECK-NEXT:    dup v5.8h, v1.h[4]
1431; CHECK-NEXT:    dup v6.8h, v0.h[4]
1432; CHECK-NEXT:    mov v2.h[1], w8
1433; CHECK-NEXT:    csetm w8, gt
1434; CHECK-NEXT:    fcmp s4, s3
1435; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1436; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1437; CHECK-NEXT:    dup v5.8h, v1.h[5]
1438; CHECK-NEXT:    dup v6.8h, v0.h[5]
1439; CHECK-NEXT:    mov v2.h[2], w8
1440; CHECK-NEXT:    csetm w8, gt
1441; CHECK-NEXT:    fcmp s4, s3
1442; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1443; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1444; CHECK-NEXT:    dup v5.8h, v1.h[6]
1445; CHECK-NEXT:    dup v6.8h, v0.h[6]
1446; CHECK-NEXT:    dup v1.8h, v1.h[7]
1447; CHECK-NEXT:    dup v0.8h, v0.h[7]
1448; CHECK-NEXT:    mov v2.h[3], w8
1449; CHECK-NEXT:    csetm w8, gt
1450; CHECK-NEXT:    fcmp s4, s3
1451; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1452; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1453; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1454; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1455; CHECK-NEXT:    mov v2.h[4], w8
1456; CHECK-NEXT:    csetm w8, gt
1457; CHECK-NEXT:    fcmp s4, s3
1458; CHECK-NEXT:    mov v2.h[5], w8
1459; CHECK-NEXT:    csetm w8, gt
1460; CHECK-NEXT:    fcmp s0, s1
1461; CHECK-NEXT:    mov v2.h[6], w8
1462; CHECK-NEXT:    csetm w8, gt
1463; CHECK-NEXT:    mov v2.h[7], w8
1464; CHECK-NEXT:    xtn v0.8b, v2.8h
1465; CHECK-NEXT:    ret
1466  %1 = fcmp ogt <8 x bfloat> %a, %b
1467  ret <8 x i1> %1
1468}
1469
1470define <8 x i1> @test_fcmp_oge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1471; CHECK-LABEL: test_fcmp_oge:
1472; CHECK:       // %bb.0:
1473; CHECK-NEXT:    dup v2.4h, v1.h[1]
1474; CHECK-NEXT:    dup v3.4h, v0.h[1]
1475; CHECK-NEXT:    dup v4.4h, v1.h[2]
1476; CHECK-NEXT:    dup v5.4h, v0.h[2]
1477; CHECK-NEXT:    dup v6.4h, v0.h[3]
1478; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1479; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1480; CHECK-NEXT:    fcmp s3, s2
1481; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1482; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1483; CHECK-NEXT:    csetm w8, ge
1484; CHECK-NEXT:    fcmp s3, s2
1485; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1486; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1487; CHECK-NEXT:    dup v5.4h, v1.h[3]
1488; CHECK-NEXT:    csetm w9, ge
1489; CHECK-NEXT:    fmov s2, w9
1490; CHECK-NEXT:    fcmp s4, s3
1491; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1492; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1493; CHECK-NEXT:    dup v5.8h, v1.h[4]
1494; CHECK-NEXT:    dup v6.8h, v0.h[4]
1495; CHECK-NEXT:    mov v2.h[1], w8
1496; CHECK-NEXT:    csetm w8, ge
1497; CHECK-NEXT:    fcmp s4, s3
1498; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1499; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1500; CHECK-NEXT:    dup v5.8h, v1.h[5]
1501; CHECK-NEXT:    dup v6.8h, v0.h[5]
1502; CHECK-NEXT:    mov v2.h[2], w8
1503; CHECK-NEXT:    csetm w8, ge
1504; CHECK-NEXT:    fcmp s4, s3
1505; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1506; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1507; CHECK-NEXT:    dup v5.8h, v1.h[6]
1508; CHECK-NEXT:    dup v6.8h, v0.h[6]
1509; CHECK-NEXT:    dup v1.8h, v1.h[7]
1510; CHECK-NEXT:    dup v0.8h, v0.h[7]
1511; CHECK-NEXT:    mov v2.h[3], w8
1512; CHECK-NEXT:    csetm w8, ge
1513; CHECK-NEXT:    fcmp s4, s3
1514; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1515; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1516; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1517; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1518; CHECK-NEXT:    mov v2.h[4], w8
1519; CHECK-NEXT:    csetm w8, ge
1520; CHECK-NEXT:    fcmp s4, s3
1521; CHECK-NEXT:    mov v2.h[5], w8
1522; CHECK-NEXT:    csetm w8, ge
1523; CHECK-NEXT:    fcmp s0, s1
1524; CHECK-NEXT:    mov v2.h[6], w8
1525; CHECK-NEXT:    csetm w8, ge
1526; CHECK-NEXT:    mov v2.h[7], w8
1527; CHECK-NEXT:    xtn v0.8b, v2.8h
1528; CHECK-NEXT:    ret
1529  %1 = fcmp oge <8 x bfloat> %a, %b
1530  ret <8 x i1> %1
1531}
1532
1533define <8 x i1> @test_fcmp_olt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1534; CHECK-LABEL: test_fcmp_olt:
1535; CHECK:       // %bb.0:
1536; CHECK-NEXT:    dup v2.4h, v1.h[1]
1537; CHECK-NEXT:    dup v3.4h, v0.h[1]
1538; CHECK-NEXT:    dup v4.4h, v1.h[2]
1539; CHECK-NEXT:    dup v5.4h, v0.h[2]
1540; CHECK-NEXT:    dup v6.4h, v0.h[3]
1541; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1542; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1543; CHECK-NEXT:    fcmp s3, s2
1544; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1545; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1546; CHECK-NEXT:    csetm w8, mi
1547; CHECK-NEXT:    fcmp s3, s2
1548; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1549; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1550; CHECK-NEXT:    dup v5.4h, v1.h[3]
1551; CHECK-NEXT:    csetm w9, mi
1552; CHECK-NEXT:    fmov s2, w9
1553; CHECK-NEXT:    fcmp s4, s3
1554; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1555; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1556; CHECK-NEXT:    dup v5.8h, v1.h[4]
1557; CHECK-NEXT:    dup v6.8h, v0.h[4]
1558; CHECK-NEXT:    mov v2.h[1], w8
1559; CHECK-NEXT:    csetm w8, mi
1560; CHECK-NEXT:    fcmp s4, s3
1561; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1562; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1563; CHECK-NEXT:    dup v5.8h, v1.h[5]
1564; CHECK-NEXT:    dup v6.8h, v0.h[5]
1565; CHECK-NEXT:    mov v2.h[2], w8
1566; CHECK-NEXT:    csetm w8, mi
1567; CHECK-NEXT:    fcmp s4, s3
1568; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1569; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1570; CHECK-NEXT:    dup v5.8h, v1.h[6]
1571; CHECK-NEXT:    dup v6.8h, v0.h[6]
1572; CHECK-NEXT:    dup v1.8h, v1.h[7]
1573; CHECK-NEXT:    dup v0.8h, v0.h[7]
1574; CHECK-NEXT:    mov v2.h[3], w8
1575; CHECK-NEXT:    csetm w8, mi
1576; CHECK-NEXT:    fcmp s4, s3
1577; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1578; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1579; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1580; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1581; CHECK-NEXT:    mov v2.h[4], w8
1582; CHECK-NEXT:    csetm w8, mi
1583; CHECK-NEXT:    fcmp s4, s3
1584; CHECK-NEXT:    mov v2.h[5], w8
1585; CHECK-NEXT:    csetm w8, mi
1586; CHECK-NEXT:    fcmp s0, s1
1587; CHECK-NEXT:    mov v2.h[6], w8
1588; CHECK-NEXT:    csetm w8, mi
1589; CHECK-NEXT:    mov v2.h[7], w8
1590; CHECK-NEXT:    xtn v0.8b, v2.8h
1591; CHECK-NEXT:    ret
1592  %1 = fcmp olt <8 x bfloat> %a, %b
1593  ret <8 x i1> %1
1594}
1595
1596define <8 x i1> @test_fcmp_ole(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1597; CHECK-LABEL: test_fcmp_ole:
1598; CHECK:       // %bb.0:
1599; CHECK-NEXT:    dup v2.4h, v1.h[1]
1600; CHECK-NEXT:    dup v3.4h, v0.h[1]
1601; CHECK-NEXT:    dup v4.4h, v1.h[2]
1602; CHECK-NEXT:    dup v5.4h, v0.h[2]
1603; CHECK-NEXT:    dup v6.4h, v0.h[3]
1604; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1605; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1606; CHECK-NEXT:    fcmp s3, s2
1607; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1608; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1609; CHECK-NEXT:    csetm w8, ls
1610; CHECK-NEXT:    fcmp s3, s2
1611; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1612; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1613; CHECK-NEXT:    dup v5.4h, v1.h[3]
1614; CHECK-NEXT:    csetm w9, ls
1615; CHECK-NEXT:    fmov s2, w9
1616; CHECK-NEXT:    fcmp s4, s3
1617; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1618; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1619; CHECK-NEXT:    dup v5.8h, v1.h[4]
1620; CHECK-NEXT:    dup v6.8h, v0.h[4]
1621; CHECK-NEXT:    mov v2.h[1], w8
1622; CHECK-NEXT:    csetm w8, ls
1623; CHECK-NEXT:    fcmp s4, s3
1624; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1625; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1626; CHECK-NEXT:    dup v5.8h, v1.h[5]
1627; CHECK-NEXT:    dup v6.8h, v0.h[5]
1628; CHECK-NEXT:    mov v2.h[2], w8
1629; CHECK-NEXT:    csetm w8, ls
1630; CHECK-NEXT:    fcmp s4, s3
1631; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1632; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1633; CHECK-NEXT:    dup v5.8h, v1.h[6]
1634; CHECK-NEXT:    dup v6.8h, v0.h[6]
1635; CHECK-NEXT:    dup v1.8h, v1.h[7]
1636; CHECK-NEXT:    dup v0.8h, v0.h[7]
1637; CHECK-NEXT:    mov v2.h[3], w8
1638; CHECK-NEXT:    csetm w8, ls
1639; CHECK-NEXT:    fcmp s4, s3
1640; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1641; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1642; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1643; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1644; CHECK-NEXT:    mov v2.h[4], w8
1645; CHECK-NEXT:    csetm w8, ls
1646; CHECK-NEXT:    fcmp s4, s3
1647; CHECK-NEXT:    mov v2.h[5], w8
1648; CHECK-NEXT:    csetm w8, ls
1649; CHECK-NEXT:    fcmp s0, s1
1650; CHECK-NEXT:    mov v2.h[6], w8
1651; CHECK-NEXT:    csetm w8, ls
1652; CHECK-NEXT:    mov v2.h[7], w8
1653; CHECK-NEXT:    xtn v0.8b, v2.8h
1654; CHECK-NEXT:    ret
1655  %1 = fcmp ole <8 x bfloat> %a, %b
1656  ret <8 x i1> %1
1657}
1658
1659define <8 x i1> @test_fcmp_ord(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
1660; CHECK-LABEL: test_fcmp_ord:
1661; CHECK:       // %bb.0:
1662; CHECK-NEXT:    dup v2.4h, v1.h[1]
1663; CHECK-NEXT:    dup v3.4h, v0.h[1]
1664; CHECK-NEXT:    dup v4.4h, v1.h[2]
1665; CHECK-NEXT:    dup v5.4h, v0.h[2]
1666; CHECK-NEXT:    dup v6.4h, v0.h[3]
1667; CHECK-NEXT:    shll v2.4s, v2.4h, #16
1668; CHECK-NEXT:    shll v3.4s, v3.4h, #16
1669; CHECK-NEXT:    fcmp s3, s2
1670; CHECK-NEXT:    shll v2.4s, v1.4h, #16
1671; CHECK-NEXT:    shll v3.4s, v0.4h, #16
1672; CHECK-NEXT:    csetm w8, vc
1673; CHECK-NEXT:    fcmp s3, s2
1674; CHECK-NEXT:    shll v3.4s, v4.4h, #16
1675; CHECK-NEXT:    shll v4.4s, v5.4h, #16
1676; CHECK-NEXT:    dup v5.4h, v1.h[3]
1677; CHECK-NEXT:    csetm w9, vc
1678; CHECK-NEXT:    fmov s2, w9
1679; CHECK-NEXT:    fcmp s4, s3
1680; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1681; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1682; CHECK-NEXT:    dup v5.8h, v1.h[4]
1683; CHECK-NEXT:    dup v6.8h, v0.h[4]
1684; CHECK-NEXT:    mov v2.h[1], w8
1685; CHECK-NEXT:    csetm w8, vc
1686; CHECK-NEXT:    fcmp s4, s3
1687; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1688; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1689; CHECK-NEXT:    dup v5.8h, v1.h[5]
1690; CHECK-NEXT:    dup v6.8h, v0.h[5]
1691; CHECK-NEXT:    mov v2.h[2], w8
1692; CHECK-NEXT:    csetm w8, vc
1693; CHECK-NEXT:    fcmp s4, s3
1694; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1695; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1696; CHECK-NEXT:    dup v5.8h, v1.h[6]
1697; CHECK-NEXT:    dup v6.8h, v0.h[6]
1698; CHECK-NEXT:    dup v1.8h, v1.h[7]
1699; CHECK-NEXT:    dup v0.8h, v0.h[7]
1700; CHECK-NEXT:    mov v2.h[3], w8
1701; CHECK-NEXT:    csetm w8, vc
1702; CHECK-NEXT:    fcmp s4, s3
1703; CHECK-NEXT:    shll v3.4s, v5.4h, #16
1704; CHECK-NEXT:    shll v4.4s, v6.4h, #16
1705; CHECK-NEXT:    shll v1.4s, v1.4h, #16
1706; CHECK-NEXT:    shll v0.4s, v0.4h, #16
1707; CHECK-NEXT:    mov v2.h[4], w8
1708; CHECK-NEXT:    csetm w8, vc
1709; CHECK-NEXT:    fcmp s4, s3
1710; CHECK-NEXT:    mov v2.h[5], w8
1711; CHECK-NEXT:    csetm w8, vc
1712; CHECK-NEXT:    fcmp s0, s1
1713; CHECK-NEXT:    mov v2.h[6], w8
1714; CHECK-NEXT:    csetm w8, vc
1715; CHECK-NEXT:    mov v2.h[7], w8
1716; CHECK-NEXT:    xtn v0.8b, v2.8h
1717; CHECK-NEXT:    ret
1718  %1 = fcmp ord <8 x bfloat> %a, %b
1719  ret <8 x i1> %1
1720}
1721
1722attributes #0 = { nounwind }
1723