xref: /llvm-project/llvm/test/CodeGen/AArch64/itofp-bf16.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
6
7; CHECK-GI:       warning: Instruction selection used fallback path for stofp_i64_bf16
8; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i64_bf16
9; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_i32_bf16
10; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i32_bf16
11; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_i16_bf16
12; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i16_bf16
13; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_i8_bf16
14; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i8_bf16
15; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i64_v2bf16
16; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i64_v2bf16
17; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i64_v3bf16
18; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i64_v3bf16
19; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i64_v4bf16
20; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i64_v4bf16
21; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i64_v8bf16
22; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i64_v8bf16
23; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i64_v16bf16
24; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i64_v16bf16
25; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i64_v32bf16
26; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i64_v32bf16
27; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i32_v2bf16
28; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i32_v2bf16
29; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i32_v3bf16
30; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i32_v3bf16
31; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i32_v4bf16
32; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i32_v4bf16
33; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i32_v8bf16
34; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i32_v8bf16
35; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i32_v16bf16
36; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i32_v16bf16
37; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i32_v32bf16
38; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i32_v32bf16
39; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i16_v2bf16
40; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i16_v2bf16
41; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i16_v3bf16
42; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i16_v3bf16
43; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i16_v4bf16
44; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i16_v4bf16
45; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i16_v8bf16
46; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i16_v8bf16
47; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i16_v16bf16
48; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i16_v16bf16
49; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i16_v32bf16
50; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i16_v32bf16
51; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i8_v2bf16
52; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i8_v2bf16
53; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i8_v3bf16
54; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i8_v3bf16
55; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i8_v4bf16
56; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i8_v4bf16
57; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i8_v8bf16
58; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i8_v8bf16
59; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i8_v16bf16
60; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i8_v16bf16
61; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i8_v32bf16
62; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i8_v32bf16
63
64define bfloat @stofp_i64_bf16(i64 %a) {
65; CHECK-LABEL: stofp_i64_bf16:
66; CHECK:       // %bb.0: // %entry
67; CHECK-NEXT:    cmp x0, #0
68; CHECK-NEXT:    and x11, x0, #0x8000000000000000
69; CHECK-NEXT:    mov w8, #32767 // =0x7fff
70; CHECK-NEXT:    cneg x9, x0, mi
71; CHECK-NEXT:    lsr x10, x9, #53
72; CHECK-NEXT:    cmp x10, #0
73; CHECK-NEXT:    and x10, x9, #0xfffffffffffff000
74; CHECK-NEXT:    csel x10, x10, x9, ne
75; CHECK-NEXT:    scvtf d0, x10
76; CHECK-NEXT:    cset w10, ne
77; CHECK-NEXT:    tst x9, #0xfff
78; CHECK-NEXT:    csel w10, wzr, w10, eq
79; CHECK-NEXT:    fmov x9, d0
80; CHECK-NEXT:    orr x9, x9, x11
81; CHECK-NEXT:    orr x9, x9, x10
82; CHECK-NEXT:    fmov d0, x9
83; CHECK-NEXT:    fcvtxn s0, d0
84; CHECK-NEXT:    fmov w9, s0
85; CHECK-NEXT:    ubfx w10, w9, #16, #1
86; CHECK-NEXT:    add w8, w9, w8
87; CHECK-NEXT:    add w8, w10, w8
88; CHECK-NEXT:    lsr w8, w8, #16
89; CHECK-NEXT:    fmov s0, w8
90; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
91; CHECK-NEXT:    ret
92entry:
93  %c = sitofp i64 %a to bfloat
94  ret bfloat %c
95}
96
97define bfloat @utofp_i64_bf16(i64 %a) {
98; CHECK-LABEL: utofp_i64_bf16:
99; CHECK:       // %bb.0: // %entry
100; CHECK-NEXT:    lsr x9, x0, #53
101; CHECK-NEXT:    mov w8, #32767 // =0x7fff
102; CHECK-NEXT:    cmp x9, #0
103; CHECK-NEXT:    and x9, x0, #0xfffffffffffff000
104; CHECK-NEXT:    csel x9, x9, x0, ne
105; CHECK-NEXT:    ucvtf d0, x9
106; CHECK-NEXT:    cset w9, ne
107; CHECK-NEXT:    tst x0, #0xfff
108; CHECK-NEXT:    csel w9, wzr, w9, eq
109; CHECK-NEXT:    fmov x10, d0
110; CHECK-NEXT:    orr x9, x10, x9
111; CHECK-NEXT:    fmov d0, x9
112; CHECK-NEXT:    fcvtxn s0, d0
113; CHECK-NEXT:    fmov w9, s0
114; CHECK-NEXT:    ubfx w10, w9, #16, #1
115; CHECK-NEXT:    add w8, w9, w8
116; CHECK-NEXT:    add w8, w10, w8
117; CHECK-NEXT:    lsr w8, w8, #16
118; CHECK-NEXT:    fmov s0, w8
119; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
120; CHECK-NEXT:    ret
121entry:
122  %c = uitofp i64 %a to bfloat
123  ret bfloat %c
124}
125
126define bfloat @stofp_i32_bf16(i32 %a) {
127; CHECK-LABEL: stofp_i32_bf16:
128; CHECK:       // %bb.0: // %entry
129; CHECK-NEXT:    scvtf d0, w0
130; CHECK-NEXT:    mov w8, #32767 // =0x7fff
131; CHECK-NEXT:    fcvtxn s0, d0
132; CHECK-NEXT:    fmov w9, s0
133; CHECK-NEXT:    ubfx w10, w9, #16, #1
134; CHECK-NEXT:    add w8, w9, w8
135; CHECK-NEXT:    add w8, w10, w8
136; CHECK-NEXT:    lsr w8, w8, #16
137; CHECK-NEXT:    fmov s0, w8
138; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
139; CHECK-NEXT:    ret
140entry:
141  %c = sitofp i32 %a to bfloat
142  ret bfloat %c
143}
144
145define bfloat @utofp_i32_bf16(i32 %a) {
146; CHECK-LABEL: utofp_i32_bf16:
147; CHECK:       // %bb.0: // %entry
148; CHECK-NEXT:    ucvtf d0, w0
149; CHECK-NEXT:    mov w8, #32767 // =0x7fff
150; CHECK-NEXT:    fcvtxn s0, d0
151; CHECK-NEXT:    fmov w9, s0
152; CHECK-NEXT:    ubfx w10, w9, #16, #1
153; CHECK-NEXT:    add w8, w9, w8
154; CHECK-NEXT:    add w8, w10, w8
155; CHECK-NEXT:    lsr w8, w8, #16
156; CHECK-NEXT:    fmov s0, w8
157; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
158; CHECK-NEXT:    ret
159entry:
160  %c = uitofp i32 %a to bfloat
161  ret bfloat %c
162}
163
164define bfloat @stofp_i16_bf16(i16 %a) {
165; CHECK-LABEL: stofp_i16_bf16:
166; CHECK:       // %bb.0: // %entry
167; CHECK-NEXT:    sxth w9, w0
168; CHECK-NEXT:    mov w8, #32767 // =0x7fff
169; CHECK-NEXT:    scvtf s0, w9
170; CHECK-NEXT:    fmov w9, s0
171; CHECK-NEXT:    ubfx w10, w9, #16, #1
172; CHECK-NEXT:    add w8, w9, w8
173; CHECK-NEXT:    add w8, w10, w8
174; CHECK-NEXT:    lsr w8, w8, #16
175; CHECK-NEXT:    fmov s0, w8
176; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
177; CHECK-NEXT:    ret
178entry:
179  %c = sitofp i16 %a to bfloat
180  ret bfloat %c
181}
182
183define bfloat @utofp_i16_bf16(i16 %a) {
184; CHECK-LABEL: utofp_i16_bf16:
185; CHECK:       // %bb.0: // %entry
186; CHECK-NEXT:    and w9, w0, #0xffff
187; CHECK-NEXT:    mov w8, #32767 // =0x7fff
188; CHECK-NEXT:    ucvtf s0, w9
189; CHECK-NEXT:    fmov w9, s0
190; CHECK-NEXT:    ubfx w10, w9, #16, #1
191; CHECK-NEXT:    add w8, w9, w8
192; CHECK-NEXT:    add w8, w10, w8
193; CHECK-NEXT:    lsr w8, w8, #16
194; CHECK-NEXT:    fmov s0, w8
195; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
196; CHECK-NEXT:    ret
197entry:
198  %c = uitofp i16 %a to bfloat
199  ret bfloat %c
200}
201
202define bfloat @stofp_i8_bf16(i8 %a) {
203; CHECK-LABEL: stofp_i8_bf16:
204; CHECK:       // %bb.0: // %entry
205; CHECK-NEXT:    sxtb w9, w0
206; CHECK-NEXT:    mov w8, #32767 // =0x7fff
207; CHECK-NEXT:    scvtf s0, w9
208; CHECK-NEXT:    fmov w9, s0
209; CHECK-NEXT:    ubfx w10, w9, #16, #1
210; CHECK-NEXT:    add w8, w9, w8
211; CHECK-NEXT:    add w8, w10, w8
212; CHECK-NEXT:    lsr w8, w8, #16
213; CHECK-NEXT:    fmov s0, w8
214; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
215; CHECK-NEXT:    ret
216entry:
217  %c = sitofp i8 %a to bfloat
218  ret bfloat %c
219}
220
221define bfloat @utofp_i8_bf16(i8 %a) {
222; CHECK-LABEL: utofp_i8_bf16:
223; CHECK:       // %bb.0: // %entry
224; CHECK-NEXT:    and w9, w0, #0xff
225; CHECK-NEXT:    mov w8, #32767 // =0x7fff
226; CHECK-NEXT:    ucvtf s0, w9
227; CHECK-NEXT:    fmov w9, s0
228; CHECK-NEXT:    ubfx w10, w9, #16, #1
229; CHECK-NEXT:    add w8, w9, w8
230; CHECK-NEXT:    add w8, w10, w8
231; CHECK-NEXT:    lsr w8, w8, #16
232; CHECK-NEXT:    fmov s0, w8
233; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
234; CHECK-NEXT:    ret
235entry:
236  %c = uitofp i8 %a to bfloat
237  ret bfloat %c
238}
239
240define <2 x bfloat> @stofp_v2i64_v2bf16(<2 x i64> %a) {
241; CHECK-LABEL: stofp_v2i64_v2bf16:
242; CHECK:       // %bb.0: // %entry
243; CHECK-NEXT:    mov x9, v0.d[1]
244; CHECK-NEXT:    mov w8, #32767 // =0x7fff
245; CHECK-NEXT:    cmp x9, #0
246; CHECK-NEXT:    cneg x10, x9, mi
247; CHECK-NEXT:    and x9, x9, #0x8000000000000000
248; CHECK-NEXT:    lsr x11, x10, #53
249; CHECK-NEXT:    and x12, x10, #0xfffffffffffff000
250; CHECK-NEXT:    cmp x11, #0
251; CHECK-NEXT:    csel x11, x12, x10, ne
252; CHECK-NEXT:    cset w12, ne
253; CHECK-NEXT:    tst x10, #0xfff
254; CHECK-NEXT:    fmov x10, d0
255; CHECK-NEXT:    csel w12, wzr, w12, eq
256; CHECK-NEXT:    scvtf d0, x11
257; CHECK-NEXT:    cmp x10, #0
258; CHECK-NEXT:    cneg x13, x10, mi
259; CHECK-NEXT:    and x10, x10, #0x8000000000000000
260; CHECK-NEXT:    lsr x14, x13, #53
261; CHECK-NEXT:    cmp x14, #0
262; CHECK-NEXT:    and x14, x13, #0xfffffffffffff000
263; CHECK-NEXT:    csel x11, x14, x13, ne
264; CHECK-NEXT:    cset w14, ne
265; CHECK-NEXT:    tst x13, #0xfff
266; CHECK-NEXT:    scvtf d1, x11
267; CHECK-NEXT:    fmov x11, d0
268; CHECK-NEXT:    orr x9, x11, x9
269; CHECK-NEXT:    csel w11, wzr, w14, eq
270; CHECK-NEXT:    fmov x13, d1
271; CHECK-NEXT:    orr x9, x9, x12
272; CHECK-NEXT:    fmov d0, x9
273; CHECK-NEXT:    orr x10, x13, x10
274; CHECK-NEXT:    orr x10, x10, x11
275; CHECK-NEXT:    fcvtxn s0, d0
276; CHECK-NEXT:    fmov d1, x10
277; CHECK-NEXT:    fcvtxn s1, d1
278; CHECK-NEXT:    fmov w9, s0
279; CHECK-NEXT:    ubfx w11, w9, #16, #1
280; CHECK-NEXT:    add w9, w9, w8
281; CHECK-NEXT:    fmov w10, s1
282; CHECK-NEXT:    add w9, w11, w9
283; CHECK-NEXT:    lsr w9, w9, #16
284; CHECK-NEXT:    ubfx w12, w10, #16, #1
285; CHECK-NEXT:    add w8, w10, w8
286; CHECK-NEXT:    fmov s1, w9
287; CHECK-NEXT:    add w8, w12, w8
288; CHECK-NEXT:    lsr w8, w8, #16
289; CHECK-NEXT:    fmov s0, w8
290; CHECK-NEXT:    mov v0.h[1], v1.h[0]
291; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
292; CHECK-NEXT:    ret
293entry:
294  %c = sitofp <2 x i64> %a to <2 x bfloat>
295  ret <2 x bfloat> %c
296}
297
298define <2 x bfloat> @utofp_v2i64_v2bf16(<2 x i64> %a) {
299; CHECK-LABEL: utofp_v2i64_v2bf16:
300; CHECK:       // %bb.0: // %entry
301; CHECK-NEXT:    mov x9, v0.d[1]
302; CHECK-NEXT:    fmov x11, d0
303; CHECK-NEXT:    mov w8, #32767 // =0x7fff
304; CHECK-NEXT:    lsr x10, x9, #53
305; CHECK-NEXT:    and x12, x9, #0xfffffffffffff000
306; CHECK-NEXT:    cmp x10, #0
307; CHECK-NEXT:    lsr x10, x11, #53
308; CHECK-NEXT:    csel x12, x12, x9, ne
309; CHECK-NEXT:    cset w13, ne
310; CHECK-NEXT:    tst x9, #0xfff
311; CHECK-NEXT:    csel w9, wzr, w13, eq
312; CHECK-NEXT:    cmp x10, #0
313; CHECK-NEXT:    and x10, x11, #0xfffffffffffff000
314; CHECK-NEXT:    csel x10, x10, x11, ne
315; CHECK-NEXT:    ucvtf d0, x12
316; CHECK-NEXT:    ucvtf d1, x10
317; CHECK-NEXT:    cset w10, ne
318; CHECK-NEXT:    tst x11, #0xfff
319; CHECK-NEXT:    csel w10, wzr, w10, eq
320; CHECK-NEXT:    fmov x11, d0
321; CHECK-NEXT:    fmov x12, d1
322; CHECK-NEXT:    orr x9, x11, x9
323; CHECK-NEXT:    orr x10, x12, x10
324; CHECK-NEXT:    fmov d0, x9
325; CHECK-NEXT:    fmov d1, x10
326; CHECK-NEXT:    fcvtxn s0, d0
327; CHECK-NEXT:    fcvtxn s1, d1
328; CHECK-NEXT:    fmov w9, s0
329; CHECK-NEXT:    fmov w10, s1
330; CHECK-NEXT:    ubfx w11, w9, #16, #1
331; CHECK-NEXT:    add w9, w9, w8
332; CHECK-NEXT:    ubfx w12, w10, #16, #1
333; CHECK-NEXT:    add w8, w10, w8
334; CHECK-NEXT:    add w9, w11, w9
335; CHECK-NEXT:    add w8, w12, w8
336; CHECK-NEXT:    lsr w9, w9, #16
337; CHECK-NEXT:    lsr w8, w8, #16
338; CHECK-NEXT:    fmov s1, w9
339; CHECK-NEXT:    fmov s0, w8
340; CHECK-NEXT:    mov v0.h[1], v1.h[0]
341; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
342; CHECK-NEXT:    ret
343entry:
344  %c = uitofp <2 x i64> %a to <2 x bfloat>
345  ret <2 x bfloat> %c
346}
347
348define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) {
349; CHECK-LABEL: stofp_v3i64_v3bf16:
350; CHECK:       // %bb.0: // %entry
351; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
352; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
353; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
354; CHECK-NEXT:    mov v0.d[1], v1.d[0]
355; CHECK-NEXT:    scvtf v1.2d, v2.2d
356; CHECK-NEXT:    movi v2.4s, #127, msl #8
357; CHECK-NEXT:    scvtf v0.2d, v0.2d
358; CHECK-NEXT:    fcvtn v0.2s, v0.2d
359; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
360; CHECK-NEXT:    movi v1.4s, #1
361; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
362; CHECK-NEXT:    add v2.4s, v0.4s, v2.4s
363; CHECK-NEXT:    and v1.16b, v3.16b, v1.16b
364; CHECK-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
365; CHECK-NEXT:    orr v0.4s, #64, lsl #16
366; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
367; CHECK-NEXT:    bit v0.16b, v1.16b, v3.16b
368; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
369; CHECK-NEXT:    ret
370entry:
371  %c = sitofp <3 x i64> %a to <3 x bfloat>
372  ret <3 x bfloat> %c
373}
374
375define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) {
376; CHECK-LABEL: utofp_v3i64_v3bf16:
377; CHECK:       // %bb.0: // %entry
378; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
379; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
380; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
381; CHECK-NEXT:    mov v0.d[1], v1.d[0]
382; CHECK-NEXT:    ucvtf v1.2d, v2.2d
383; CHECK-NEXT:    movi v2.4s, #127, msl #8
384; CHECK-NEXT:    ucvtf v0.2d, v0.2d
385; CHECK-NEXT:    fcvtn v0.2s, v0.2d
386; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
387; CHECK-NEXT:    movi v1.4s, #1
388; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
389; CHECK-NEXT:    add v2.4s, v0.4s, v2.4s
390; CHECK-NEXT:    and v1.16b, v3.16b, v1.16b
391; CHECK-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
392; CHECK-NEXT:    orr v0.4s, #64, lsl #16
393; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
394; CHECK-NEXT:    bit v0.16b, v1.16b, v3.16b
395; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
396; CHECK-NEXT:    ret
397entry:
398  %c = uitofp <3 x i64> %a to <3 x bfloat>
399  ret <3 x bfloat> %c
400}
401
402define <4 x bfloat> @stofp_v4i64_v4bf16(<4 x i64> %a) {
403; CHECK-LABEL: stofp_v4i64_v4bf16:
404; CHECK:       // %bb.0: // %entry
405; CHECK-NEXT:    scvtf v0.2d, v0.2d
406; CHECK-NEXT:    scvtf v1.2d, v1.2d
407; CHECK-NEXT:    movi v2.4s, #127, msl #8
408; CHECK-NEXT:    fcvtn v0.2s, v0.2d
409; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
410; CHECK-NEXT:    movi v1.4s, #1
411; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
412; CHECK-NEXT:    add v2.4s, v0.4s, v2.4s
413; CHECK-NEXT:    and v1.16b, v3.16b, v1.16b
414; CHECK-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
415; CHECK-NEXT:    orr v0.4s, #64, lsl #16
416; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
417; CHECK-NEXT:    bit v0.16b, v1.16b, v3.16b
418; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
419; CHECK-NEXT:    ret
420entry:
421  %c = sitofp <4 x i64> %a to <4 x bfloat>
422  ret <4 x bfloat> %c
423}
424
425define <4 x bfloat> @utofp_v4i64_v4bf16(<4 x i64> %a) {
426; CHECK-LABEL: utofp_v4i64_v4bf16:
427; CHECK:       // %bb.0: // %entry
428; CHECK-NEXT:    ucvtf v0.2d, v0.2d
429; CHECK-NEXT:    ucvtf v1.2d, v1.2d
430; CHECK-NEXT:    movi v2.4s, #127, msl #8
431; CHECK-NEXT:    fcvtn v0.2s, v0.2d
432; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
433; CHECK-NEXT:    movi v1.4s, #1
434; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
435; CHECK-NEXT:    add v2.4s, v0.4s, v2.4s
436; CHECK-NEXT:    and v1.16b, v3.16b, v1.16b
437; CHECK-NEXT:    fcmeq v3.4s, v0.4s, v0.4s
438; CHECK-NEXT:    orr v0.4s, #64, lsl #16
439; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
440; CHECK-NEXT:    bit v0.16b, v1.16b, v3.16b
441; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
442; CHECK-NEXT:    ret
443entry:
444  %c = uitofp <4 x i64> %a to <4 x bfloat>
445  ret <4 x bfloat> %c
446}
447
448define <8 x bfloat> @stofp_v8i64_v8bf16(<8 x i64> %a) {
449; CHECK-LABEL: stofp_v8i64_v8bf16:
450; CHECK:       // %bb.0: // %entry
451; CHECK-NEXT:    scvtf v2.2d, v2.2d
452; CHECK-NEXT:    scvtf v0.2d, v0.2d
453; CHECK-NEXT:    scvtf v3.2d, v3.2d
454; CHECK-NEXT:    scvtf v1.2d, v1.2d
455; CHECK-NEXT:    fcvtn v2.2s, v2.2d
456; CHECK-NEXT:    fcvtn v0.2s, v0.2d
457; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
458; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
459; CHECK-NEXT:    movi v1.4s, #1
460; CHECK-NEXT:    movi v3.4s, #127, msl #8
461; CHECK-NEXT:    ushr v4.4s, v2.4s, #16
462; CHECK-NEXT:    ushr v5.4s, v0.4s, #16
463; CHECK-NEXT:    add v6.4s, v2.4s, v3.4s
464; CHECK-NEXT:    add v3.4s, v0.4s, v3.4s
465; CHECK-NEXT:    and v4.16b, v4.16b, v1.16b
466; CHECK-NEXT:    and v1.16b, v5.16b, v1.16b
467; CHECK-NEXT:    fcmeq v5.4s, v2.4s, v2.4s
468; CHECK-NEXT:    orr v2.4s, #64, lsl #16
469; CHECK-NEXT:    add v4.4s, v4.4s, v6.4s
470; CHECK-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
471; CHECK-NEXT:    add v1.4s, v1.4s, v3.4s
472; CHECK-NEXT:    orr v0.4s, #64, lsl #16
473; CHECK-NEXT:    bit v2.16b, v4.16b, v5.16b
474; CHECK-NEXT:    bit v0.16b, v1.16b, v6.16b
475; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
476; CHECK-NEXT:    ret
477entry:
478  %c = sitofp <8 x i64> %a to <8 x bfloat>
479  ret <8 x bfloat> %c
480}
481
482define <8 x bfloat> @utofp_v8i64_v8bf16(<8 x i64> %a) {
483; CHECK-LABEL: utofp_v8i64_v8bf16:
484; CHECK:       // %bb.0: // %entry
485; CHECK-NEXT:    ucvtf v2.2d, v2.2d
486; CHECK-NEXT:    ucvtf v0.2d, v0.2d
487; CHECK-NEXT:    ucvtf v3.2d, v3.2d
488; CHECK-NEXT:    ucvtf v1.2d, v1.2d
489; CHECK-NEXT:    fcvtn v2.2s, v2.2d
490; CHECK-NEXT:    fcvtn v0.2s, v0.2d
491; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
492; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
493; CHECK-NEXT:    movi v1.4s, #1
494; CHECK-NEXT:    movi v3.4s, #127, msl #8
495; CHECK-NEXT:    ushr v4.4s, v2.4s, #16
496; CHECK-NEXT:    ushr v5.4s, v0.4s, #16
497; CHECK-NEXT:    add v6.4s, v2.4s, v3.4s
498; CHECK-NEXT:    add v3.4s, v0.4s, v3.4s
499; CHECK-NEXT:    and v4.16b, v4.16b, v1.16b
500; CHECK-NEXT:    and v1.16b, v5.16b, v1.16b
501; CHECK-NEXT:    fcmeq v5.4s, v2.4s, v2.4s
502; CHECK-NEXT:    orr v2.4s, #64, lsl #16
503; CHECK-NEXT:    add v4.4s, v4.4s, v6.4s
504; CHECK-NEXT:    fcmeq v6.4s, v0.4s, v0.4s
505; CHECK-NEXT:    add v1.4s, v1.4s, v3.4s
506; CHECK-NEXT:    orr v0.4s, #64, lsl #16
507; CHECK-NEXT:    bit v2.16b, v4.16b, v5.16b
508; CHECK-NEXT:    bit v0.16b, v1.16b, v6.16b
509; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
510; CHECK-NEXT:    ret
511entry:
512  %c = uitofp <8 x i64> %a to <8 x bfloat>
513  ret <8 x bfloat> %c
514}
515
516define <16 x bfloat> @stofp_v16i64_v16bf16(<16 x i64> %a) {
517; CHECK-LABEL: stofp_v16i64_v16bf16:
518; CHECK:       // %bb.0: // %entry
519; CHECK-NEXT:    scvtf v0.2d, v0.2d
520; CHECK-NEXT:    scvtf v2.2d, v2.2d
521; CHECK-NEXT:    scvtf v6.2d, v6.2d
522; CHECK-NEXT:    scvtf v4.2d, v4.2d
523; CHECK-NEXT:    scvtf v1.2d, v1.2d
524; CHECK-NEXT:    scvtf v3.2d, v3.2d
525; CHECK-NEXT:    scvtf v7.2d, v7.2d
526; CHECK-NEXT:    scvtf v5.2d, v5.2d
527; CHECK-NEXT:    fcvtn v0.2s, v0.2d
528; CHECK-NEXT:    fcvtn v2.2s, v2.2d
529; CHECK-NEXT:    fcvtn v6.2s, v6.2d
530; CHECK-NEXT:    fcvtn v4.2s, v4.2d
531; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
532; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
533; CHECK-NEXT:    fcvtn2 v6.4s, v7.2d
534; CHECK-NEXT:    fcvtn2 v4.4s, v5.2d
535; CHECK-NEXT:    movi v1.4s, #1
536; CHECK-NEXT:    movi v3.4s, #127, msl #8
537; CHECK-NEXT:    ushr v7.4s, v0.4s, #16
538; CHECK-NEXT:    ushr v5.4s, v2.4s, #16
539; CHECK-NEXT:    ushr v16.4s, v6.4s, #16
540; CHECK-NEXT:    ushr v17.4s, v4.4s, #16
541; CHECK-NEXT:    add v19.4s, v0.4s, v3.4s
542; CHECK-NEXT:    add v18.4s, v2.4s, v3.4s
543; CHECK-NEXT:    add v20.4s, v6.4s, v3.4s
544; CHECK-NEXT:    add v3.4s, v4.4s, v3.4s
545; CHECK-NEXT:    and v7.16b, v7.16b, v1.16b
546; CHECK-NEXT:    and v5.16b, v5.16b, v1.16b
547; CHECK-NEXT:    and v16.16b, v16.16b, v1.16b
548; CHECK-NEXT:    and v1.16b, v17.16b, v1.16b
549; CHECK-NEXT:    fcmeq v17.4s, v2.4s, v2.4s
550; CHECK-NEXT:    orr v2.4s, #64, lsl #16
551; CHECK-NEXT:    add v7.4s, v7.4s, v19.4s
552; CHECK-NEXT:    fcmeq v19.4s, v6.4s, v6.4s
553; CHECK-NEXT:    add v5.4s, v5.4s, v18.4s
554; CHECK-NEXT:    fcmeq v18.4s, v0.4s, v0.4s
555; CHECK-NEXT:    add v1.4s, v1.4s, v3.4s
556; CHECK-NEXT:    fcmeq v3.4s, v4.4s, v4.4s
557; CHECK-NEXT:    add v16.4s, v16.4s, v20.4s
558; CHECK-NEXT:    orr v0.4s, #64, lsl #16
559; CHECK-NEXT:    orr v6.4s, #64, lsl #16
560; CHECK-NEXT:    orr v4.4s, #64, lsl #16
561; CHECK-NEXT:    bit v2.16b, v5.16b, v17.16b
562; CHECK-NEXT:    mov v5.16b, v19.16b
563; CHECK-NEXT:    bit v0.16b, v7.16b, v18.16b
564; CHECK-NEXT:    bif v1.16b, v4.16b, v3.16b
565; CHECK-NEXT:    bsl v5.16b, v16.16b, v6.16b
566; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
567; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v5.8h
568; CHECK-NEXT:    ret
569entry:
570  %c = sitofp <16 x i64> %a to <16 x bfloat>
571  ret <16 x bfloat> %c
572}
573
574define <16 x bfloat> @utofp_v16i64_v16bf16(<16 x i64> %a) {
575; CHECK-LABEL: utofp_v16i64_v16bf16:
576; CHECK:       // %bb.0: // %entry
577; CHECK-NEXT:    ucvtf v0.2d, v0.2d
578; CHECK-NEXT:    ucvtf v2.2d, v2.2d
579; CHECK-NEXT:    ucvtf v6.2d, v6.2d
580; CHECK-NEXT:    ucvtf v4.2d, v4.2d
581; CHECK-NEXT:    ucvtf v1.2d, v1.2d
582; CHECK-NEXT:    ucvtf v3.2d, v3.2d
583; CHECK-NEXT:    ucvtf v7.2d, v7.2d
584; CHECK-NEXT:    ucvtf v5.2d, v5.2d
585; CHECK-NEXT:    fcvtn v0.2s, v0.2d
586; CHECK-NEXT:    fcvtn v2.2s, v2.2d
587; CHECK-NEXT:    fcvtn v6.2s, v6.2d
588; CHECK-NEXT:    fcvtn v4.2s, v4.2d
589; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
590; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
591; CHECK-NEXT:    fcvtn2 v6.4s, v7.2d
592; CHECK-NEXT:    fcvtn2 v4.4s, v5.2d
593; CHECK-NEXT:    movi v1.4s, #1
594; CHECK-NEXT:    movi v3.4s, #127, msl #8
595; CHECK-NEXT:    ushr v7.4s, v0.4s, #16
596; CHECK-NEXT:    ushr v5.4s, v2.4s, #16
597; CHECK-NEXT:    ushr v16.4s, v6.4s, #16
598; CHECK-NEXT:    ushr v17.4s, v4.4s, #16
599; CHECK-NEXT:    add v19.4s, v0.4s, v3.4s
600; CHECK-NEXT:    add v18.4s, v2.4s, v3.4s
601; CHECK-NEXT:    add v20.4s, v6.4s, v3.4s
602; CHECK-NEXT:    add v3.4s, v4.4s, v3.4s
603; CHECK-NEXT:    and v7.16b, v7.16b, v1.16b
604; CHECK-NEXT:    and v5.16b, v5.16b, v1.16b
605; CHECK-NEXT:    and v16.16b, v16.16b, v1.16b
606; CHECK-NEXT:    and v1.16b, v17.16b, v1.16b
607; CHECK-NEXT:    fcmeq v17.4s, v2.4s, v2.4s
608; CHECK-NEXT:    orr v2.4s, #64, lsl #16
609; CHECK-NEXT:    add v7.4s, v7.4s, v19.4s
610; CHECK-NEXT:    fcmeq v19.4s, v6.4s, v6.4s
611; CHECK-NEXT:    add v5.4s, v5.4s, v18.4s
612; CHECK-NEXT:    fcmeq v18.4s, v0.4s, v0.4s
613; CHECK-NEXT:    add v1.4s, v1.4s, v3.4s
614; CHECK-NEXT:    fcmeq v3.4s, v4.4s, v4.4s
615; CHECK-NEXT:    add v16.4s, v16.4s, v20.4s
616; CHECK-NEXT:    orr v0.4s, #64, lsl #16
617; CHECK-NEXT:    orr v6.4s, #64, lsl #16
618; CHECK-NEXT:    orr v4.4s, #64, lsl #16
619; CHECK-NEXT:    bit v2.16b, v5.16b, v17.16b
620; CHECK-NEXT:    mov v5.16b, v19.16b
621; CHECK-NEXT:    bit v0.16b, v7.16b, v18.16b
622; CHECK-NEXT:    bif v1.16b, v4.16b, v3.16b
623; CHECK-NEXT:    bsl v5.16b, v16.16b, v6.16b
624; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
625; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v5.8h
626; CHECK-NEXT:    ret
627entry:
628  %c = uitofp <16 x i64> %a to <16 x bfloat>
629  ret <16 x bfloat> %c
630}
631
632define <32 x bfloat> @stofp_v32i64_v32bf16(<32 x i64> %a) {
633; CHECK-LABEL: stofp_v32i64_v32bf16:
634; CHECK:       // %bb.0: // %entry
635; CHECK-NEXT:    scvtf v17.2d, v2.2d
636; CHECK-NEXT:    scvtf v18.2d, v0.2d
637; CHECK-NEXT:    scvtf v19.2d, v3.2d
638; CHECK-NEXT:    scvtf v3.2d, v6.2d
639; CHECK-NEXT:    ldp q21, q20, [sp, #32]
640; CHECK-NEXT:    scvtf v4.2d, v4.2d
641; CHECK-NEXT:    scvtf v6.2d, v7.2d
642; CHECK-NEXT:    scvtf v5.2d, v5.2d
643; CHECK-NEXT:    ldp q24, q23, [sp, #64]
644; CHECK-NEXT:    movi v16.4s, #1
645; CHECK-NEXT:    fcvtn v0.2s, v17.2d
646; CHECK-NEXT:    scvtf v17.2d, v1.2d
647; CHECK-NEXT:    fcvtn v1.2s, v18.2d
648; CHECK-NEXT:    fcvtn v3.2s, v3.2d
649; CHECK-NEXT:    ldp q18, q7, [sp]
650; CHECK-NEXT:    scvtf v21.2d, v21.2d
651; CHECK-NEXT:    fcvtn v4.2s, v4.2d
652; CHECK-NEXT:    movi v2.4s, #127, msl #8
653; CHECK-NEXT:    scvtf v20.2d, v20.2d
654; CHECK-NEXT:    fcvtn2 v0.4s, v19.2d
655; CHECK-NEXT:    ldp q22, q19, [sp, #96]
656; CHECK-NEXT:    fcvtn2 v1.4s, v17.2d
657; CHECK-NEXT:    fcvtn2 v3.4s, v6.2d
658; CHECK-NEXT:    scvtf v18.2d, v18.2d
659; CHECK-NEXT:    scvtf v17.2d, v24.2d
660; CHECK-NEXT:    fcvtn v6.2s, v21.2d
661; CHECK-NEXT:    fcvtn2 v4.4s, v5.2d
662; CHECK-NEXT:    scvtf v22.2d, v22.2d
663; CHECK-NEXT:    scvtf v21.2d, v23.2d
664; CHECK-NEXT:    scvtf v7.2d, v7.2d
665; CHECK-NEXT:    ushr v24.4s, v0.4s, #16
666; CHECK-NEXT:    add v5.4s, v0.4s, v2.4s
667; CHECK-NEXT:    scvtf v19.2d, v19.2d
668; CHECK-NEXT:    ushr v23.4s, v1.4s, #16
669; CHECK-NEXT:    ushr v25.4s, v3.4s, #16
670; CHECK-NEXT:    fcvtn v18.2s, v18.2d
671; CHECK-NEXT:    fcvtn2 v6.4s, v20.2d
672; CHECK-NEXT:    add v26.4s, v1.4s, v2.4s
673; CHECK-NEXT:    fcvtn v17.2s, v17.2d
674; CHECK-NEXT:    and v24.16b, v24.16b, v16.16b
675; CHECK-NEXT:    fcvtn v22.2s, v22.2d
676; CHECK-NEXT:    fcmeq v20.4s, v0.4s, v0.4s
677; CHECK-NEXT:    and v23.16b, v23.16b, v16.16b
678; CHECK-NEXT:    orr v0.4s, #64, lsl #16
679; CHECK-NEXT:    fcmeq v27.4s, v3.4s, v3.4s
680; CHECK-NEXT:    fcvtn2 v18.4s, v7.2d
681; CHECK-NEXT:    add v7.4s, v3.4s, v2.4s
682; CHECK-NEXT:    orr v3.4s, #64, lsl #16
683; CHECK-NEXT:    add v5.4s, v24.4s, v5.4s
684; CHECK-NEXT:    and v24.16b, v25.16b, v16.16b
685; CHECK-NEXT:    ushr v25.4s, v4.4s, #16
686; CHECK-NEXT:    fcvtn2 v22.4s, v19.2d
687; CHECK-NEXT:    add v19.4s, v23.4s, v26.4s
688; CHECK-NEXT:    ushr v26.4s, v6.4s, #16
689; CHECK-NEXT:    fcvtn2 v17.4s, v21.2d
690; CHECK-NEXT:    fcmeq v21.4s, v1.4s, v1.4s
691; CHECK-NEXT:    orr v1.4s, #64, lsl #16
692; CHECK-NEXT:    and v23.16b, v25.16b, v16.16b
693; CHECK-NEXT:    add v25.4s, v4.4s, v2.4s
694; CHECK-NEXT:    add v7.4s, v24.4s, v7.4s
695; CHECK-NEXT:    ushr v24.4s, v18.4s, #16
696; CHECK-NEXT:    add v30.4s, v18.4s, v2.4s
697; CHECK-NEXT:    bit v0.16b, v5.16b, v20.16b
698; CHECK-NEXT:    ushr v28.4s, v22.4s, #16
699; CHECK-NEXT:    add v31.4s, v22.4s, v2.4s
700; CHECK-NEXT:    add v23.4s, v23.4s, v25.4s
701; CHECK-NEXT:    and v25.16b, v26.16b, v16.16b
702; CHECK-NEXT:    add v26.4s, v6.4s, v2.4s
703; CHECK-NEXT:    ushr v29.4s, v17.4s, #16
704; CHECK-NEXT:    and v24.16b, v24.16b, v16.16b
705; CHECK-NEXT:    add v2.4s, v17.4s, v2.4s
706; CHECK-NEXT:    and v28.16b, v28.16b, v16.16b
707; CHECK-NEXT:    bit v3.16b, v7.16b, v27.16b
708; CHECK-NEXT:    bit v1.16b, v19.16b, v21.16b
709; CHECK-NEXT:    add v25.4s, v25.4s, v26.4s
710; CHECK-NEXT:    fcmeq v26.4s, v6.4s, v6.4s
711; CHECK-NEXT:    orr v6.4s, #64, lsl #16
712; CHECK-NEXT:    and v16.16b, v29.16b, v16.16b
713; CHECK-NEXT:    add v24.4s, v24.4s, v30.4s
714; CHECK-NEXT:    fcmeq v30.4s, v18.4s, v18.4s
715; CHECK-NEXT:    add v28.4s, v28.4s, v31.4s
716; CHECK-NEXT:    fcmeq v31.4s, v22.4s, v22.4s
717; CHECK-NEXT:    fcmeq v29.4s, v4.4s, v4.4s
718; CHECK-NEXT:    orr v4.4s, #64, lsl #16
719; CHECK-NEXT:    orr v18.4s, #64, lsl #16
720; CHECK-NEXT:    orr v22.4s, #64, lsl #16
721; CHECK-NEXT:    mov v5.16b, v26.16b
722; CHECK-NEXT:    add v2.4s, v16.4s, v2.4s
723; CHECK-NEXT:    fcmeq v16.4s, v17.4s, v17.4s
724; CHECK-NEXT:    orr v17.4s, #64, lsl #16
725; CHECK-NEXT:    uzp2 v0.8h, v1.8h, v0.8h
726; CHECK-NEXT:    mov v7.16b, v31.16b
727; CHECK-NEXT:    bit v4.16b, v23.16b, v29.16b
728; CHECK-NEXT:    bsl v5.16b, v25.16b, v6.16b
729; CHECK-NEXT:    mov v6.16b, v30.16b
730; CHECK-NEXT:    bsl v16.16b, v2.16b, v17.16b
731; CHECK-NEXT:    bsl v7.16b, v28.16b, v22.16b
732; CHECK-NEXT:    bsl v6.16b, v24.16b, v18.16b
733; CHECK-NEXT:    uzp2 v1.8h, v4.8h, v3.8h
734; CHECK-NEXT:    uzp2 v3.8h, v16.8h, v7.8h
735; CHECK-NEXT:    uzp2 v2.8h, v6.8h, v5.8h
736; CHECK-NEXT:    ret
737entry:
738  %c = sitofp <32 x i64> %a to <32 x bfloat>
739  ret <32 x bfloat> %c
740}
741
742define <32 x bfloat> @utofp_v32i64_v32bf16(<32 x i64> %a) {
743; CHECK-LABEL: utofp_v32i64_v32bf16:
744; CHECK:       // %bb.0: // %entry
745; CHECK-NEXT:    ucvtf v17.2d, v2.2d
746; CHECK-NEXT:    ucvtf v18.2d, v0.2d
747; CHECK-NEXT:    ucvtf v19.2d, v3.2d
748; CHECK-NEXT:    ucvtf v3.2d, v6.2d
749; CHECK-NEXT:    ldp q21, q20, [sp, #32]
750; CHECK-NEXT:    ucvtf v4.2d, v4.2d
751; CHECK-NEXT:    ucvtf v6.2d, v7.2d
752; CHECK-NEXT:    ucvtf v5.2d, v5.2d
753; CHECK-NEXT:    ldp q24, q23, [sp, #64]
754; CHECK-NEXT:    movi v16.4s, #1
755; CHECK-NEXT:    fcvtn v0.2s, v17.2d
756; CHECK-NEXT:    ucvtf v17.2d, v1.2d
757; CHECK-NEXT:    fcvtn v1.2s, v18.2d
758; CHECK-NEXT:    fcvtn v3.2s, v3.2d
759; CHECK-NEXT:    ldp q18, q7, [sp]
760; CHECK-NEXT:    ucvtf v21.2d, v21.2d
761; CHECK-NEXT:    fcvtn v4.2s, v4.2d
762; CHECK-NEXT:    movi v2.4s, #127, msl #8
763; CHECK-NEXT:    ucvtf v20.2d, v20.2d
764; CHECK-NEXT:    fcvtn2 v0.4s, v19.2d
765; CHECK-NEXT:    ldp q22, q19, [sp, #96]
766; CHECK-NEXT:    fcvtn2 v1.4s, v17.2d
767; CHECK-NEXT:    fcvtn2 v3.4s, v6.2d
768; CHECK-NEXT:    ucvtf v18.2d, v18.2d
769; CHECK-NEXT:    ucvtf v17.2d, v24.2d
770; CHECK-NEXT:    fcvtn v6.2s, v21.2d
771; CHECK-NEXT:    fcvtn2 v4.4s, v5.2d
772; CHECK-NEXT:    ucvtf v22.2d, v22.2d
773; CHECK-NEXT:    ucvtf v21.2d, v23.2d
774; CHECK-NEXT:    ucvtf v7.2d, v7.2d
775; CHECK-NEXT:    ushr v24.4s, v0.4s, #16
776; CHECK-NEXT:    add v5.4s, v0.4s, v2.4s
777; CHECK-NEXT:    ucvtf v19.2d, v19.2d
778; CHECK-NEXT:    ushr v23.4s, v1.4s, #16
779; CHECK-NEXT:    ushr v25.4s, v3.4s, #16
780; CHECK-NEXT:    fcvtn v18.2s, v18.2d
781; CHECK-NEXT:    fcvtn2 v6.4s, v20.2d
782; CHECK-NEXT:    add v26.4s, v1.4s, v2.4s
783; CHECK-NEXT:    fcvtn v17.2s, v17.2d
784; CHECK-NEXT:    and v24.16b, v24.16b, v16.16b
785; CHECK-NEXT:    fcvtn v22.2s, v22.2d
786; CHECK-NEXT:    fcmeq v20.4s, v0.4s, v0.4s
787; CHECK-NEXT:    and v23.16b, v23.16b, v16.16b
788; CHECK-NEXT:    orr v0.4s, #64, lsl #16
789; CHECK-NEXT:    fcmeq v27.4s, v3.4s, v3.4s
790; CHECK-NEXT:    fcvtn2 v18.4s, v7.2d
791; CHECK-NEXT:    add v7.4s, v3.4s, v2.4s
792; CHECK-NEXT:    orr v3.4s, #64, lsl #16
793; CHECK-NEXT:    add v5.4s, v24.4s, v5.4s
794; CHECK-NEXT:    and v24.16b, v25.16b, v16.16b
795; CHECK-NEXT:    ushr v25.4s, v4.4s, #16
796; CHECK-NEXT:    fcvtn2 v22.4s, v19.2d
797; CHECK-NEXT:    add v19.4s, v23.4s, v26.4s
798; CHECK-NEXT:    ushr v26.4s, v6.4s, #16
799; CHECK-NEXT:    fcvtn2 v17.4s, v21.2d
800; CHECK-NEXT:    fcmeq v21.4s, v1.4s, v1.4s
801; CHECK-NEXT:    orr v1.4s, #64, lsl #16
802; CHECK-NEXT:    and v23.16b, v25.16b, v16.16b
803; CHECK-NEXT:    add v25.4s, v4.4s, v2.4s
804; CHECK-NEXT:    add v7.4s, v24.4s, v7.4s
805; CHECK-NEXT:    ushr v24.4s, v18.4s, #16
806; CHECK-NEXT:    add v30.4s, v18.4s, v2.4s
807; CHECK-NEXT:    bit v0.16b, v5.16b, v20.16b
808; CHECK-NEXT:    ushr v28.4s, v22.4s, #16
809; CHECK-NEXT:    add v31.4s, v22.4s, v2.4s
810; CHECK-NEXT:    add v23.4s, v23.4s, v25.4s
811; CHECK-NEXT:    and v25.16b, v26.16b, v16.16b
812; CHECK-NEXT:    add v26.4s, v6.4s, v2.4s
813; CHECK-NEXT:    ushr v29.4s, v17.4s, #16
814; CHECK-NEXT:    and v24.16b, v24.16b, v16.16b
815; CHECK-NEXT:    add v2.4s, v17.4s, v2.4s
816; CHECK-NEXT:    and v28.16b, v28.16b, v16.16b
817; CHECK-NEXT:    bit v3.16b, v7.16b, v27.16b
818; CHECK-NEXT:    bit v1.16b, v19.16b, v21.16b
819; CHECK-NEXT:    add v25.4s, v25.4s, v26.4s
820; CHECK-NEXT:    fcmeq v26.4s, v6.4s, v6.4s
821; CHECK-NEXT:    orr v6.4s, #64, lsl #16
822; CHECK-NEXT:    and v16.16b, v29.16b, v16.16b
823; CHECK-NEXT:    add v24.4s, v24.4s, v30.4s
824; CHECK-NEXT:    fcmeq v30.4s, v18.4s, v18.4s
825; CHECK-NEXT:    add v28.4s, v28.4s, v31.4s
826; CHECK-NEXT:    fcmeq v31.4s, v22.4s, v22.4s
827; CHECK-NEXT:    fcmeq v29.4s, v4.4s, v4.4s
828; CHECK-NEXT:    orr v4.4s, #64, lsl #16
829; CHECK-NEXT:    orr v18.4s, #64, lsl #16
830; CHECK-NEXT:    orr v22.4s, #64, lsl #16
831; CHECK-NEXT:    mov v5.16b, v26.16b
832; CHECK-NEXT:    add v2.4s, v16.4s, v2.4s
833; CHECK-NEXT:    fcmeq v16.4s, v17.4s, v17.4s
834; CHECK-NEXT:    orr v17.4s, #64, lsl #16
835; CHECK-NEXT:    uzp2 v0.8h, v1.8h, v0.8h
836; CHECK-NEXT:    mov v7.16b, v31.16b
837; CHECK-NEXT:    bit v4.16b, v23.16b, v29.16b
838; CHECK-NEXT:    bsl v5.16b, v25.16b, v6.16b
839; CHECK-NEXT:    mov v6.16b, v30.16b
840; CHECK-NEXT:    bsl v16.16b, v2.16b, v17.16b
841; CHECK-NEXT:    bsl v7.16b, v28.16b, v22.16b
842; CHECK-NEXT:    bsl v6.16b, v24.16b, v18.16b
843; CHECK-NEXT:    uzp2 v1.8h, v4.8h, v3.8h
844; CHECK-NEXT:    uzp2 v3.8h, v16.8h, v7.8h
845; CHECK-NEXT:    uzp2 v2.8h, v6.8h, v5.8h
846; CHECK-NEXT:    ret
847entry:
848  %c = uitofp <32 x i64> %a to <32 x bfloat>
849  ret <32 x bfloat> %c
850}
851
852define <2 x bfloat> @stofp_v2i32_v2bf16(<2 x i32> %a) {
853; CHECK-LABEL: stofp_v2i32_v2bf16:
854; CHECK:       // %bb.0: // %entry
855; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
856; CHECK-NEXT:    movi v1.4s, #1
857; CHECK-NEXT:    scvtf v0.4s, v0.4s
858; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
859; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
860; CHECK-NEXT:    movi v2.4s, #127, msl #8
861; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
862; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
863; CHECK-NEXT:    ret
864entry:
865  %c = sitofp <2 x i32> %a to <2 x bfloat>
866  ret <2 x bfloat> %c
867}
868
869define <2 x bfloat> @utofp_v2i32_v2bf16(<2 x i32> %a) {
870; CHECK-LABEL: utofp_v2i32_v2bf16:
871; CHECK:       // %bb.0: // %entry
872; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
873; CHECK-NEXT:    movi v1.4s, #1
874; CHECK-NEXT:    ucvtf v0.4s, v0.4s
875; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
876; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
877; CHECK-NEXT:    movi v2.4s, #127, msl #8
878; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
879; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
880; CHECK-NEXT:    ret
881entry:
882  %c = uitofp <2 x i32> %a to <2 x bfloat>
883  ret <2 x bfloat> %c
884}
885
886define <3 x bfloat> @stofp_v3i32_v3bf16(<3 x i32> %a) {
887; CHECK-LABEL: stofp_v3i32_v3bf16:
888; CHECK:       // %bb.0: // %entry
889; CHECK-NEXT:    scvtf v0.4s, v0.4s
890; CHECK-NEXT:    movi v1.4s, #1
891; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
892; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
893; CHECK-NEXT:    movi v2.4s, #127, msl #8
894; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
895; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
896; CHECK-NEXT:    ret
897entry:
898  %c = sitofp <3 x i32> %a to <3 x bfloat>
899  ret <3 x bfloat> %c
900}
901
902define <3 x bfloat> @utofp_v3i32_v3bf16(<3 x i32> %a) {
903; CHECK-LABEL: utofp_v3i32_v3bf16:
904; CHECK:       // %bb.0: // %entry
905; CHECK-NEXT:    ucvtf v0.4s, v0.4s
906; CHECK-NEXT:    movi v1.4s, #1
907; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
908; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
909; CHECK-NEXT:    movi v2.4s, #127, msl #8
910; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
911; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
912; CHECK-NEXT:    ret
913entry:
914  %c = uitofp <3 x i32> %a to <3 x bfloat>
915  ret <3 x bfloat> %c
916}
917
918define <4 x bfloat> @stofp_v4i32_v4bf16(<4 x i32> %a) {
919; CHECK-LABEL: stofp_v4i32_v4bf16:
920; CHECK:       // %bb.0: // %entry
921; CHECK-NEXT:    scvtf v0.4s, v0.4s
922; CHECK-NEXT:    movi v1.4s, #1
923; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
924; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
925; CHECK-NEXT:    movi v2.4s, #127, msl #8
926; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
927; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
928; CHECK-NEXT:    ret
929entry:
930  %c = sitofp <4 x i32> %a to <4 x bfloat>
931  ret <4 x bfloat> %c
932}
933
934define <4 x bfloat> @utofp_v4i32_v4bf16(<4 x i32> %a) {
935; CHECK-LABEL: utofp_v4i32_v4bf16:
936; CHECK:       // %bb.0: // %entry
937; CHECK-NEXT:    ucvtf v0.4s, v0.4s
938; CHECK-NEXT:    movi v1.4s, #1
939; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
940; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
941; CHECK-NEXT:    movi v2.4s, #127, msl #8
942; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
943; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
944; CHECK-NEXT:    ret
945entry:
946  %c = uitofp <4 x i32> %a to <4 x bfloat>
947  ret <4 x bfloat> %c
948}
949
950define <8 x bfloat> @stofp_v8i32_v8bf16(<8 x i32> %a) {
951; CHECK-LABEL: stofp_v8i32_v8bf16:
952; CHECK:       // %bb.0: // %entry
953; CHECK-NEXT:    scvtf v0.4s, v0.4s
954; CHECK-NEXT:    movi v2.4s, #1
955; CHECK-NEXT:    scvtf v1.4s, v1.4s
956; CHECK-NEXT:    movi v5.4s, #127, msl #8
957; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
958; CHECK-NEXT:    ushr v4.4s, v1.4s, #16
959; CHECK-NEXT:    and v3.16b, v3.16b, v2.16b
960; CHECK-NEXT:    and v2.16b, v4.16b, v2.16b
961; CHECK-NEXT:    add v0.4s, v3.4s, v0.4s
962; CHECK-NEXT:    add v1.4s, v2.4s, v1.4s
963; CHECK-NEXT:    addhn v0.4h, v0.4s, v5.4s
964; CHECK-NEXT:    addhn2 v0.8h, v1.4s, v5.4s
965; CHECK-NEXT:    ret
966entry:
967  %c = sitofp <8 x i32> %a to <8 x bfloat>
968  ret <8 x bfloat> %c
969}
970
971define <8 x bfloat> @utofp_v8i32_v8bf16(<8 x i32> %a) {
972; CHECK-LABEL: utofp_v8i32_v8bf16:
973; CHECK:       // %bb.0: // %entry
974; CHECK-NEXT:    ucvtf v0.4s, v0.4s
975; CHECK-NEXT:    movi v2.4s, #1
976; CHECK-NEXT:    ucvtf v1.4s, v1.4s
977; CHECK-NEXT:    movi v5.4s, #127, msl #8
978; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
979; CHECK-NEXT:    ushr v4.4s, v1.4s, #16
980; CHECK-NEXT:    and v3.16b, v3.16b, v2.16b
981; CHECK-NEXT:    and v2.16b, v4.16b, v2.16b
982; CHECK-NEXT:    add v0.4s, v3.4s, v0.4s
983; CHECK-NEXT:    add v1.4s, v2.4s, v1.4s
984; CHECK-NEXT:    addhn v0.4h, v0.4s, v5.4s
985; CHECK-NEXT:    addhn2 v0.8h, v1.4s, v5.4s
986; CHECK-NEXT:    ret
987entry:
988  %c = uitofp <8 x i32> %a to <8 x bfloat>
989  ret <8 x bfloat> %c
990}
991
992define <16 x bfloat> @stofp_v16i32_v16bf16(<16 x i32> %a) {
993; CHECK-LABEL: stofp_v16i32_v16bf16:
994; CHECK:       // %bb.0: // %entry
995; CHECK-NEXT:    scvtf v2.4s, v2.4s
996; CHECK-NEXT:    scvtf v0.4s, v0.4s
997; CHECK-NEXT:    scvtf v4.4s, v1.4s
998; CHECK-NEXT:    movi v1.4s, #1
999; CHECK-NEXT:    scvtf v3.4s, v3.4s
1000; CHECK-NEXT:    movi v17.4s, #127, msl #8
1001; CHECK-NEXT:    ushr v5.4s, v0.4s, #16
1002; CHECK-NEXT:    ushr v6.4s, v2.4s, #16
1003; CHECK-NEXT:    ushr v7.4s, v4.4s, #16
1004; CHECK-NEXT:    ushr v16.4s, v3.4s, #16
1005; CHECK-NEXT:    and v5.16b, v5.16b, v1.16b
1006; CHECK-NEXT:    and v6.16b, v6.16b, v1.16b
1007; CHECK-NEXT:    add v0.4s, v5.4s, v0.4s
1008; CHECK-NEXT:    add v2.4s, v6.4s, v2.4s
1009; CHECK-NEXT:    and v5.16b, v7.16b, v1.16b
1010; CHECK-NEXT:    and v6.16b, v16.16b, v1.16b
1011; CHECK-NEXT:    addhn v0.4h, v0.4s, v17.4s
1012; CHECK-NEXT:    addhn v1.4h, v2.4s, v17.4s
1013; CHECK-NEXT:    add v2.4s, v5.4s, v4.4s
1014; CHECK-NEXT:    add v3.4s, v6.4s, v3.4s
1015; CHECK-NEXT:    addhn2 v0.8h, v2.4s, v17.4s
1016; CHECK-NEXT:    addhn2 v1.8h, v3.4s, v17.4s
1017; CHECK-NEXT:    ret
1018entry:
1019  %c = sitofp <16 x i32> %a to <16 x bfloat>
1020  ret <16 x bfloat> %c
1021}
1022
1023define <16 x bfloat> @utofp_v16i32_v16bf16(<16 x i32> %a) {
1024; CHECK-LABEL: utofp_v16i32_v16bf16:
1025; CHECK:       // %bb.0: // %entry
1026; CHECK-NEXT:    ucvtf v2.4s, v2.4s
1027; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1028; CHECK-NEXT:    ucvtf v4.4s, v1.4s
1029; CHECK-NEXT:    movi v1.4s, #1
1030; CHECK-NEXT:    ucvtf v3.4s, v3.4s
1031; CHECK-NEXT:    movi v17.4s, #127, msl #8
1032; CHECK-NEXT:    ushr v5.4s, v0.4s, #16
1033; CHECK-NEXT:    ushr v6.4s, v2.4s, #16
1034; CHECK-NEXT:    ushr v7.4s, v4.4s, #16
1035; CHECK-NEXT:    ushr v16.4s, v3.4s, #16
1036; CHECK-NEXT:    and v5.16b, v5.16b, v1.16b
1037; CHECK-NEXT:    and v6.16b, v6.16b, v1.16b
1038; CHECK-NEXT:    add v0.4s, v5.4s, v0.4s
1039; CHECK-NEXT:    add v2.4s, v6.4s, v2.4s
1040; CHECK-NEXT:    and v5.16b, v7.16b, v1.16b
1041; CHECK-NEXT:    and v6.16b, v16.16b, v1.16b
1042; CHECK-NEXT:    addhn v0.4h, v0.4s, v17.4s
1043; CHECK-NEXT:    addhn v1.4h, v2.4s, v17.4s
1044; CHECK-NEXT:    add v2.4s, v5.4s, v4.4s
1045; CHECK-NEXT:    add v3.4s, v6.4s, v3.4s
1046; CHECK-NEXT:    addhn2 v0.8h, v2.4s, v17.4s
1047; CHECK-NEXT:    addhn2 v1.8h, v3.4s, v17.4s
1048; CHECK-NEXT:    ret
1049entry:
1050  %c = uitofp <16 x i32> %a to <16 x bfloat>
1051  ret <16 x bfloat> %c
1052}
1053
1054define <32 x bfloat> @stofp_v32i32_v32bf16(<32 x i32> %a) {
1055; CHECK-LABEL: stofp_v32i32_v32bf16:
1056; CHECK:       // %bb.0: // %entry
1057; CHECK-NEXT:    scvtf v0.4s, v0.4s
1058; CHECK-NEXT:    scvtf v2.4s, v2.4s
1059; CHECK-NEXT:    scvtf v4.4s, v4.4s
1060; CHECK-NEXT:    scvtf v6.4s, v6.4s
1061; CHECK-NEXT:    movi v16.4s, #1
1062; CHECK-NEXT:    scvtf v1.4s, v1.4s
1063; CHECK-NEXT:    scvtf v17.4s, v3.4s
1064; CHECK-NEXT:    scvtf v5.4s, v5.4s
1065; CHECK-NEXT:    scvtf v7.4s, v7.4s
1066; CHECK-NEXT:    movi v21.4s, #127, msl #8
1067; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
1068; CHECK-NEXT:    ushr v18.4s, v2.4s, #16
1069; CHECK-NEXT:    ushr v19.4s, v4.4s, #16
1070; CHECK-NEXT:    ushr v20.4s, v6.4s, #16
1071; CHECK-NEXT:    ushr v22.4s, v1.4s, #16
1072; CHECK-NEXT:    ushr v23.4s, v17.4s, #16
1073; CHECK-NEXT:    ushr v24.4s, v5.4s, #16
1074; CHECK-NEXT:    ushr v25.4s, v7.4s, #16
1075; CHECK-NEXT:    and v3.16b, v3.16b, v16.16b
1076; CHECK-NEXT:    and v18.16b, v18.16b, v16.16b
1077; CHECK-NEXT:    and v19.16b, v19.16b, v16.16b
1078; CHECK-NEXT:    and v20.16b, v20.16b, v16.16b
1079; CHECK-NEXT:    add v0.4s, v3.4s, v0.4s
1080; CHECK-NEXT:    and v3.16b, v22.16b, v16.16b
1081; CHECK-NEXT:    add v2.4s, v18.4s, v2.4s
1082; CHECK-NEXT:    add v4.4s, v19.4s, v4.4s
1083; CHECK-NEXT:    add v6.4s, v20.4s, v6.4s
1084; CHECK-NEXT:    and v18.16b, v23.16b, v16.16b
1085; CHECK-NEXT:    and v19.16b, v24.16b, v16.16b
1086; CHECK-NEXT:    and v16.16b, v25.16b, v16.16b
1087; CHECK-NEXT:    add v20.4s, v3.4s, v1.4s
1088; CHECK-NEXT:    addhn v0.4h, v0.4s, v21.4s
1089; CHECK-NEXT:    addhn v1.4h, v2.4s, v21.4s
1090; CHECK-NEXT:    addhn v2.4h, v4.4s, v21.4s
1091; CHECK-NEXT:    addhn v3.4h, v6.4s, v21.4s
1092; CHECK-NEXT:    add v4.4s, v18.4s, v17.4s
1093; CHECK-NEXT:    add v5.4s, v19.4s, v5.4s
1094; CHECK-NEXT:    add v6.4s, v16.4s, v7.4s
1095; CHECK-NEXT:    addhn2 v0.8h, v20.4s, v21.4s
1096; CHECK-NEXT:    addhn2 v1.8h, v4.4s, v21.4s
1097; CHECK-NEXT:    addhn2 v2.8h, v5.4s, v21.4s
1098; CHECK-NEXT:    addhn2 v3.8h, v6.4s, v21.4s
1099; CHECK-NEXT:    ret
1100entry:
1101  %c = sitofp <32 x i32> %a to <32 x bfloat>
1102  ret <32 x bfloat> %c
1103}
1104
1105define <32 x bfloat> @utofp_v32i32_v32bf16(<32 x i32> %a) {
1106; CHECK-LABEL: utofp_v32i32_v32bf16:
1107; CHECK:       // %bb.0: // %entry
1108; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1109; CHECK-NEXT:    ucvtf v2.4s, v2.4s
1110; CHECK-NEXT:    ucvtf v4.4s, v4.4s
1111; CHECK-NEXT:    ucvtf v6.4s, v6.4s
1112; CHECK-NEXT:    movi v16.4s, #1
1113; CHECK-NEXT:    ucvtf v1.4s, v1.4s
1114; CHECK-NEXT:    ucvtf v17.4s, v3.4s
1115; CHECK-NEXT:    ucvtf v5.4s, v5.4s
1116; CHECK-NEXT:    ucvtf v7.4s, v7.4s
1117; CHECK-NEXT:    movi v21.4s, #127, msl #8
1118; CHECK-NEXT:    ushr v3.4s, v0.4s, #16
1119; CHECK-NEXT:    ushr v18.4s, v2.4s, #16
1120; CHECK-NEXT:    ushr v19.4s, v4.4s, #16
1121; CHECK-NEXT:    ushr v20.4s, v6.4s, #16
1122; CHECK-NEXT:    ushr v22.4s, v1.4s, #16
1123; CHECK-NEXT:    ushr v23.4s, v17.4s, #16
1124; CHECK-NEXT:    ushr v24.4s, v5.4s, #16
1125; CHECK-NEXT:    ushr v25.4s, v7.4s, #16
1126; CHECK-NEXT:    and v3.16b, v3.16b, v16.16b
1127; CHECK-NEXT:    and v18.16b, v18.16b, v16.16b
1128; CHECK-NEXT:    and v19.16b, v19.16b, v16.16b
1129; CHECK-NEXT:    and v20.16b, v20.16b, v16.16b
1130; CHECK-NEXT:    add v0.4s, v3.4s, v0.4s
1131; CHECK-NEXT:    and v3.16b, v22.16b, v16.16b
1132; CHECK-NEXT:    add v2.4s, v18.4s, v2.4s
1133; CHECK-NEXT:    add v4.4s, v19.4s, v4.4s
1134; CHECK-NEXT:    add v6.4s, v20.4s, v6.4s
1135; CHECK-NEXT:    and v18.16b, v23.16b, v16.16b
1136; CHECK-NEXT:    and v19.16b, v24.16b, v16.16b
1137; CHECK-NEXT:    and v16.16b, v25.16b, v16.16b
1138; CHECK-NEXT:    add v20.4s, v3.4s, v1.4s
1139; CHECK-NEXT:    addhn v0.4h, v0.4s, v21.4s
1140; CHECK-NEXT:    addhn v1.4h, v2.4s, v21.4s
1141; CHECK-NEXT:    addhn v2.4h, v4.4s, v21.4s
1142; CHECK-NEXT:    addhn v3.4h, v6.4s, v21.4s
1143; CHECK-NEXT:    add v4.4s, v18.4s, v17.4s
1144; CHECK-NEXT:    add v5.4s, v19.4s, v5.4s
1145; CHECK-NEXT:    add v6.4s, v16.4s, v7.4s
1146; CHECK-NEXT:    addhn2 v0.8h, v20.4s, v21.4s
1147; CHECK-NEXT:    addhn2 v1.8h, v4.4s, v21.4s
1148; CHECK-NEXT:    addhn2 v2.8h, v5.4s, v21.4s
1149; CHECK-NEXT:    addhn2 v3.8h, v6.4s, v21.4s
1150; CHECK-NEXT:    ret
1151entry:
1152  %c = uitofp <32 x i32> %a to <32 x bfloat>
1153  ret <32 x bfloat> %c
1154}
1155
1156define <2 x bfloat> @stofp_v2i16_v2bf16(<2 x i16> %a) {
1157; CHECK-LABEL: stofp_v2i16_v2bf16:
1158; CHECK:       // %bb.0: // %entry
1159; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
1160; CHECK-NEXT:    movi v1.4s, #1
1161; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
1162; CHECK-NEXT:    scvtf v0.4s, v0.4s
1163; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1164; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1165; CHECK-NEXT:    movi v2.4s, #127, msl #8
1166; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1167; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1168; CHECK-NEXT:    ret
1169entry:
1170  %c = sitofp <2 x i16> %a to <2 x bfloat>
1171  ret <2 x bfloat> %c
1172}
1173
1174define <2 x bfloat> @utofp_v2i16_v2bf16(<2 x i16> %a) {
1175; CHECK-LABEL: utofp_v2i16_v2bf16:
1176; CHECK:       // %bb.0: // %entry
1177; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
1178; CHECK-NEXT:    movi v1.4s, #1
1179; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
1180; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1181; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1182; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1183; CHECK-NEXT:    movi v2.4s, #127, msl #8
1184; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1185; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1186; CHECK-NEXT:    ret
1187entry:
1188  %c = uitofp <2 x i16> %a to <2 x bfloat>
1189  ret <2 x bfloat> %c
1190}
1191
1192define <3 x bfloat> @stofp_v3i16_v3bf16(<3 x i16> %a) {
1193; CHECK-LABEL: stofp_v3i16_v3bf16:
1194; CHECK:       // %bb.0: // %entry
1195; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
1196; CHECK-NEXT:    movi v1.4s, #1
1197; CHECK-NEXT:    scvtf v0.4s, v0.4s
1198; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1199; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1200; CHECK-NEXT:    movi v2.4s, #127, msl #8
1201; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1202; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1203; CHECK-NEXT:    ret
1204entry:
1205  %c = sitofp <3 x i16> %a to <3 x bfloat>
1206  ret <3 x bfloat> %c
1207}
1208
1209define <3 x bfloat> @utofp_v3i16_v3bf16(<3 x i16> %a) {
1210; CHECK-LABEL: utofp_v3i16_v3bf16:
1211; CHECK:       // %bb.0: // %entry
1212; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
1213; CHECK-NEXT:    movi v1.4s, #1
1214; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1215; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1216; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1217; CHECK-NEXT:    movi v2.4s, #127, msl #8
1218; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1219; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1220; CHECK-NEXT:    ret
1221entry:
1222  %c = uitofp <3 x i16> %a to <3 x bfloat>
1223  ret <3 x bfloat> %c
1224}
1225
1226define <4 x bfloat> @stofp_v4i16_v4bf16(<4 x i16> %a) {
1227; CHECK-LABEL: stofp_v4i16_v4bf16:
1228; CHECK:       // %bb.0: // %entry
1229; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
1230; CHECK-NEXT:    movi v1.4s, #1
1231; CHECK-NEXT:    scvtf v0.4s, v0.4s
1232; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1233; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1234; CHECK-NEXT:    movi v2.4s, #127, msl #8
1235; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1236; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1237; CHECK-NEXT:    ret
1238entry:
1239  %c = sitofp <4 x i16> %a to <4 x bfloat>
1240  ret <4 x bfloat> %c
1241}
1242
1243define <4 x bfloat> @utofp_v4i16_v4bf16(<4 x i16> %a) {
1244; CHECK-LABEL: utofp_v4i16_v4bf16:
1245; CHECK:       // %bb.0: // %entry
1246; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
1247; CHECK-NEXT:    movi v1.4s, #1
1248; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1249; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1250; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1251; CHECK-NEXT:    movi v2.4s, #127, msl #8
1252; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1253; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1254; CHECK-NEXT:    ret
1255entry:
1256  %c = uitofp <4 x i16> %a to <4 x bfloat>
1257  ret <4 x bfloat> %c
1258}
1259
1260define <8 x bfloat> @stofp_v8i16_v8bf16(<8 x i16> %a) {
1261; CHECK-LABEL: stofp_v8i16_v8bf16:
1262; CHECK:       // %bb.0: // %entry
1263; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
1264; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
1265; CHECK-NEXT:    movi v1.4s, #1
1266; CHECK-NEXT:    movi v4.4s, #127, msl #8
1267; CHECK-NEXT:    scvtf v2.4s, v2.4s
1268; CHECK-NEXT:    scvtf v3.4s, v0.4s
1269; CHECK-NEXT:    ushr v0.4s, v2.4s, #16
1270; CHECK-NEXT:    ushr v5.4s, v3.4s, #16
1271; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
1272; CHECK-NEXT:    and v1.16b, v5.16b, v1.16b
1273; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
1274; CHECK-NEXT:    add v1.4s, v1.4s, v4.4s
1275; CHECK-NEXT:    addhn v0.4h, v2.4s, v0.4s
1276; CHECK-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
1277; CHECK-NEXT:    ret
1278entry:
1279  %c = sitofp <8 x i16> %a to <8 x bfloat>
1280  ret <8 x bfloat> %c
1281}
1282
1283define <8 x bfloat> @utofp_v8i16_v8bf16(<8 x i16> %a) {
1284; CHECK-LABEL: utofp_v8i16_v8bf16:
1285; CHECK:       // %bb.0: // %entry
1286; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
1287; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
1288; CHECK-NEXT:    movi v1.4s, #1
1289; CHECK-NEXT:    movi v4.4s, #127, msl #8
1290; CHECK-NEXT:    ucvtf v2.4s, v2.4s
1291; CHECK-NEXT:    ucvtf v3.4s, v0.4s
1292; CHECK-NEXT:    ushr v0.4s, v2.4s, #16
1293; CHECK-NEXT:    ushr v5.4s, v3.4s, #16
1294; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
1295; CHECK-NEXT:    and v1.16b, v5.16b, v1.16b
1296; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
1297; CHECK-NEXT:    add v1.4s, v1.4s, v4.4s
1298; CHECK-NEXT:    addhn v0.4h, v2.4s, v0.4s
1299; CHECK-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
1300; CHECK-NEXT:    ret
1301entry:
1302  %c = uitofp <8 x i16> %a to <8 x bfloat>
1303  ret <8 x bfloat> %c
1304}
1305
1306define <16 x bfloat> @stofp_v16i16_v16bf16(<16 x i16> %a) {
1307; CHECK-LABEL: stofp_v16i16_v16bf16:
1308; CHECK:       // %bb.0: // %entry
1309; CHECK-NEXT:    sshll v3.4s, v0.4h, #0
1310; CHECK-NEXT:    sshll v4.4s, v1.4h, #0
1311; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
1312; CHECK-NEXT:    sshll2 v1.4s, v1.8h, #0
1313; CHECK-NEXT:    movi v2.4s, #1
1314; CHECK-NEXT:    movi v7.4s, #127, msl #8
1315; CHECK-NEXT:    scvtf v3.4s, v3.4s
1316; CHECK-NEXT:    scvtf v4.4s, v4.4s
1317; CHECK-NEXT:    scvtf v5.4s, v0.4s
1318; CHECK-NEXT:    scvtf v6.4s, v1.4s
1319; CHECK-NEXT:    ushr v0.4s, v3.4s, #16
1320; CHECK-NEXT:    ushr v1.4s, v4.4s, #16
1321; CHECK-NEXT:    ushr v16.4s, v5.4s, #16
1322; CHECK-NEXT:    ushr v17.4s, v6.4s, #16
1323; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
1324; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
1325; CHECK-NEXT:    and v16.16b, v16.16b, v2.16b
1326; CHECK-NEXT:    and v2.16b, v17.16b, v2.16b
1327; CHECK-NEXT:    add v0.4s, v0.4s, v7.4s
1328; CHECK-NEXT:    add v1.4s, v1.4s, v7.4s
1329; CHECK-NEXT:    add v2.4s, v2.4s, v7.4s
1330; CHECK-NEXT:    addhn v0.4h, v3.4s, v0.4s
1331; CHECK-NEXT:    addhn v1.4h, v4.4s, v1.4s
1332; CHECK-NEXT:    add v3.4s, v16.4s, v7.4s
1333; CHECK-NEXT:    addhn2 v0.8h, v5.4s, v3.4s
1334; CHECK-NEXT:    addhn2 v1.8h, v6.4s, v2.4s
1335; CHECK-NEXT:    ret
1336entry:
1337  %c = sitofp <16 x i16> %a to <16 x bfloat>
1338  ret <16 x bfloat> %c
1339}
1340
1341define <16 x bfloat> @utofp_v16i16_v16bf16(<16 x i16> %a) {
1342; CHECK-LABEL: utofp_v16i16_v16bf16:
1343; CHECK:       // %bb.0: // %entry
1344; CHECK-NEXT:    ushll v3.4s, v0.4h, #0
1345; CHECK-NEXT:    ushll v4.4s, v1.4h, #0
1346; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
1347; CHECK-NEXT:    ushll2 v1.4s, v1.8h, #0
1348; CHECK-NEXT:    movi v2.4s, #1
1349; CHECK-NEXT:    movi v7.4s, #127, msl #8
1350; CHECK-NEXT:    ucvtf v3.4s, v3.4s
1351; CHECK-NEXT:    ucvtf v4.4s, v4.4s
1352; CHECK-NEXT:    ucvtf v5.4s, v0.4s
1353; CHECK-NEXT:    ucvtf v6.4s, v1.4s
1354; CHECK-NEXT:    ushr v0.4s, v3.4s, #16
1355; CHECK-NEXT:    ushr v1.4s, v4.4s, #16
1356; CHECK-NEXT:    ushr v16.4s, v5.4s, #16
1357; CHECK-NEXT:    ushr v17.4s, v6.4s, #16
1358; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
1359; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
1360; CHECK-NEXT:    and v16.16b, v16.16b, v2.16b
1361; CHECK-NEXT:    and v2.16b, v17.16b, v2.16b
1362; CHECK-NEXT:    add v0.4s, v0.4s, v7.4s
1363; CHECK-NEXT:    add v1.4s, v1.4s, v7.4s
1364; CHECK-NEXT:    add v2.4s, v2.4s, v7.4s
1365; CHECK-NEXT:    addhn v0.4h, v3.4s, v0.4s
1366; CHECK-NEXT:    addhn v1.4h, v4.4s, v1.4s
1367; CHECK-NEXT:    add v3.4s, v16.4s, v7.4s
1368; CHECK-NEXT:    addhn2 v0.8h, v5.4s, v3.4s
1369; CHECK-NEXT:    addhn2 v1.8h, v6.4s, v2.4s
1370; CHECK-NEXT:    ret
1371entry:
1372  %c = uitofp <16 x i16> %a to <16 x bfloat>
1373  ret <16 x bfloat> %c
1374}
1375
1376define <32 x bfloat> @stofp_v32i16_v32bf16(<32 x i16> %a) {
1377; CHECK-LABEL: stofp_v32i16_v32bf16:
1378; CHECK:       // %bb.0: // %entry
1379; CHECK-NEXT:    sshll v4.4s, v1.4h, #0
1380; CHECK-NEXT:    sshll v5.4s, v0.4h, #0
1381; CHECK-NEXT:    sshll v6.4s, v2.4h, #0
1382; CHECK-NEXT:    sshll v7.4s, v3.4h, #0
1383; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
1384; CHECK-NEXT:    sshll2 v1.4s, v1.8h, #0
1385; CHECK-NEXT:    sshll2 v2.4s, v2.8h, #0
1386; CHECK-NEXT:    sshll2 v3.4s, v3.8h, #0
1387; CHECK-NEXT:    movi v16.4s, #1
1388; CHECK-NEXT:    scvtf v5.4s, v5.4s
1389; CHECK-NEXT:    scvtf v4.4s, v4.4s
1390; CHECK-NEXT:    scvtf v6.4s, v6.4s
1391; CHECK-NEXT:    scvtf v7.4s, v7.4s
1392; CHECK-NEXT:    scvtf v17.4s, v0.4s
1393; CHECK-NEXT:    scvtf v18.4s, v1.4s
1394; CHECK-NEXT:    scvtf v19.4s, v2.4s
1395; CHECK-NEXT:    scvtf v20.4s, v3.4s
1396; CHECK-NEXT:    movi v21.4s, #127, msl #8
1397; CHECK-NEXT:    ushr v0.4s, v5.4s, #16
1398; CHECK-NEXT:    ushr v1.4s, v4.4s, #16
1399; CHECK-NEXT:    ushr v2.4s, v6.4s, #16
1400; CHECK-NEXT:    ushr v3.4s, v7.4s, #16
1401; CHECK-NEXT:    ushr v22.4s, v17.4s, #16
1402; CHECK-NEXT:    ushr v23.4s, v18.4s, #16
1403; CHECK-NEXT:    ushr v24.4s, v19.4s, #16
1404; CHECK-NEXT:    ushr v25.4s, v20.4s, #16
1405; CHECK-NEXT:    and v0.16b, v0.16b, v16.16b
1406; CHECK-NEXT:    and v1.16b, v1.16b, v16.16b
1407; CHECK-NEXT:    and v2.16b, v2.16b, v16.16b
1408; CHECK-NEXT:    and v3.16b, v3.16b, v16.16b
1409; CHECK-NEXT:    and v22.16b, v22.16b, v16.16b
1410; CHECK-NEXT:    and v23.16b, v23.16b, v16.16b
1411; CHECK-NEXT:    and v24.16b, v24.16b, v16.16b
1412; CHECK-NEXT:    and v16.16b, v25.16b, v16.16b
1413; CHECK-NEXT:    add v0.4s, v0.4s, v21.4s
1414; CHECK-NEXT:    add v1.4s, v1.4s, v21.4s
1415; CHECK-NEXT:    add v2.4s, v2.4s, v21.4s
1416; CHECK-NEXT:    add v3.4s, v3.4s, v21.4s
1417; CHECK-NEXT:    addhn v0.4h, v5.4s, v0.4s
1418; CHECK-NEXT:    addhn v1.4h, v4.4s, v1.4s
1419; CHECK-NEXT:    addhn v2.4h, v6.4s, v2.4s
1420; CHECK-NEXT:    addhn v3.4h, v7.4s, v3.4s
1421; CHECK-NEXT:    add v4.4s, v22.4s, v21.4s
1422; CHECK-NEXT:    add v5.4s, v23.4s, v21.4s
1423; CHECK-NEXT:    add v6.4s, v24.4s, v21.4s
1424; CHECK-NEXT:    add v7.4s, v16.4s, v21.4s
1425; CHECK-NEXT:    addhn2 v0.8h, v17.4s, v4.4s
1426; CHECK-NEXT:    addhn2 v1.8h, v18.4s, v5.4s
1427; CHECK-NEXT:    addhn2 v2.8h, v19.4s, v6.4s
1428; CHECK-NEXT:    addhn2 v3.8h, v20.4s, v7.4s
1429; CHECK-NEXT:    ret
1430entry:
1431  %c = sitofp <32 x i16> %a to <32 x bfloat>
1432  ret <32 x bfloat> %c
1433}
1434
1435define <32 x bfloat> @utofp_v32i16_v32bf16(<32 x i16> %a) {
1436; CHECK-LABEL: utofp_v32i16_v32bf16:
1437; CHECK:       // %bb.0: // %entry
1438; CHECK-NEXT:    ushll v4.4s, v1.4h, #0
1439; CHECK-NEXT:    ushll v5.4s, v0.4h, #0
1440; CHECK-NEXT:    ushll v6.4s, v2.4h, #0
1441; CHECK-NEXT:    ushll v7.4s, v3.4h, #0
1442; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
1443; CHECK-NEXT:    ushll2 v1.4s, v1.8h, #0
1444; CHECK-NEXT:    ushll2 v2.4s, v2.8h, #0
1445; CHECK-NEXT:    ushll2 v3.4s, v3.8h, #0
1446; CHECK-NEXT:    movi v16.4s, #1
1447; CHECK-NEXT:    ucvtf v5.4s, v5.4s
1448; CHECK-NEXT:    ucvtf v4.4s, v4.4s
1449; CHECK-NEXT:    ucvtf v6.4s, v6.4s
1450; CHECK-NEXT:    ucvtf v7.4s, v7.4s
1451; CHECK-NEXT:    ucvtf v17.4s, v0.4s
1452; CHECK-NEXT:    ucvtf v18.4s, v1.4s
1453; CHECK-NEXT:    ucvtf v19.4s, v2.4s
1454; CHECK-NEXT:    ucvtf v20.4s, v3.4s
1455; CHECK-NEXT:    movi v21.4s, #127, msl #8
1456; CHECK-NEXT:    ushr v0.4s, v5.4s, #16
1457; CHECK-NEXT:    ushr v1.4s, v4.4s, #16
1458; CHECK-NEXT:    ushr v2.4s, v6.4s, #16
1459; CHECK-NEXT:    ushr v3.4s, v7.4s, #16
1460; CHECK-NEXT:    ushr v22.4s, v17.4s, #16
1461; CHECK-NEXT:    ushr v23.4s, v18.4s, #16
1462; CHECK-NEXT:    ushr v24.4s, v19.4s, #16
1463; CHECK-NEXT:    ushr v25.4s, v20.4s, #16
1464; CHECK-NEXT:    and v0.16b, v0.16b, v16.16b
1465; CHECK-NEXT:    and v1.16b, v1.16b, v16.16b
1466; CHECK-NEXT:    and v2.16b, v2.16b, v16.16b
1467; CHECK-NEXT:    and v3.16b, v3.16b, v16.16b
1468; CHECK-NEXT:    and v22.16b, v22.16b, v16.16b
1469; CHECK-NEXT:    and v23.16b, v23.16b, v16.16b
1470; CHECK-NEXT:    and v24.16b, v24.16b, v16.16b
1471; CHECK-NEXT:    and v16.16b, v25.16b, v16.16b
1472; CHECK-NEXT:    add v0.4s, v0.4s, v21.4s
1473; CHECK-NEXT:    add v1.4s, v1.4s, v21.4s
1474; CHECK-NEXT:    add v2.4s, v2.4s, v21.4s
1475; CHECK-NEXT:    add v3.4s, v3.4s, v21.4s
1476; CHECK-NEXT:    addhn v0.4h, v5.4s, v0.4s
1477; CHECK-NEXT:    addhn v1.4h, v4.4s, v1.4s
1478; CHECK-NEXT:    addhn v2.4h, v6.4s, v2.4s
1479; CHECK-NEXT:    addhn v3.4h, v7.4s, v3.4s
1480; CHECK-NEXT:    add v4.4s, v22.4s, v21.4s
1481; CHECK-NEXT:    add v5.4s, v23.4s, v21.4s
1482; CHECK-NEXT:    add v6.4s, v24.4s, v21.4s
1483; CHECK-NEXT:    add v7.4s, v16.4s, v21.4s
1484; CHECK-NEXT:    addhn2 v0.8h, v17.4s, v4.4s
1485; CHECK-NEXT:    addhn2 v1.8h, v18.4s, v5.4s
1486; CHECK-NEXT:    addhn2 v2.8h, v19.4s, v6.4s
1487; CHECK-NEXT:    addhn2 v3.8h, v20.4s, v7.4s
1488; CHECK-NEXT:    ret
1489entry:
1490  %c = uitofp <32 x i16> %a to <32 x bfloat>
1491  ret <32 x bfloat> %c
1492}
1493
1494define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
1495; CHECK-LABEL: stofp_v2i8_v2bf16:
1496; CHECK:       // %bb.0: // %entry
1497; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1498; CHECK-NEXT:    mov w9, v0.s[1]
1499; CHECK-NEXT:    fmov w10, s0
1500; CHECK-NEXT:    mov w8, #32767 // =0x7fff
1501; CHECK-NEXT:    sxtb w10, w10
1502; CHECK-NEXT:    sxtb w9, w9
1503; CHECK-NEXT:    scvtf s1, w10
1504; CHECK-NEXT:    scvtf s0, w9
1505; CHECK-NEXT:    fmov w10, s1
1506; CHECK-NEXT:    fmov w9, s0
1507; CHECK-NEXT:    ubfx w12, w10, #16, #1
1508; CHECK-NEXT:    ubfx w11, w9, #16, #1
1509; CHECK-NEXT:    add w9, w9, w8
1510; CHECK-NEXT:    add w8, w10, w8
1511; CHECK-NEXT:    add w8, w12, w8
1512; CHECK-NEXT:    add w9, w11, w9
1513; CHECK-NEXT:    lsr w8, w8, #16
1514; CHECK-NEXT:    lsr w9, w9, #16
1515; CHECK-NEXT:    fmov s0, w8
1516; CHECK-NEXT:    fmov s1, w9
1517; CHECK-NEXT:    mov v0.h[1], v1.h[0]
1518; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1519; CHECK-NEXT:    ret
1520entry:
1521  %c = sitofp <2 x i8> %a to <2 x bfloat>
1522  ret <2 x bfloat> %c
1523}
1524
1525define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
1526; CHECK-LABEL: utofp_v2i8_v2bf16:
1527; CHECK:       // %bb.0: // %entry
1528; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1529; CHECK-NEXT:    mov w9, v0.s[1]
1530; CHECK-NEXT:    fmov w10, s0
1531; CHECK-NEXT:    mov w8, #32767 // =0x7fff
1532; CHECK-NEXT:    and w10, w10, #0xff
1533; CHECK-NEXT:    and w9, w9, #0xff
1534; CHECK-NEXT:    ucvtf s1, w10
1535; CHECK-NEXT:    ucvtf s0, w9
1536; CHECK-NEXT:    fmov w10, s1
1537; CHECK-NEXT:    fmov w9, s0
1538; CHECK-NEXT:    ubfx w12, w10, #16, #1
1539; CHECK-NEXT:    ubfx w11, w9, #16, #1
1540; CHECK-NEXT:    add w9, w9, w8
1541; CHECK-NEXT:    add w8, w10, w8
1542; CHECK-NEXT:    add w8, w12, w8
1543; CHECK-NEXT:    add w9, w11, w9
1544; CHECK-NEXT:    lsr w8, w8, #16
1545; CHECK-NEXT:    lsr w9, w9, #16
1546; CHECK-NEXT:    fmov s0, w8
1547; CHECK-NEXT:    fmov s1, w9
1548; CHECK-NEXT:    mov v0.h[1], v1.h[0]
1549; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1550; CHECK-NEXT:    ret
1551entry:
1552  %c = uitofp <2 x i8> %a to <2 x bfloat>
1553  ret <2 x bfloat> %c
1554}
1555
1556define <3 x bfloat> @stofp_v3i8_v3bf16(<3 x i8> %a) {
1557; CHECK-LABEL: stofp_v3i8_v3bf16:
1558; CHECK:       // %bb.0: // %entry
1559; CHECK-NEXT:    fmov s0, w0
1560; CHECK-NEXT:    movi v1.4s, #1
1561; CHECK-NEXT:    mov v0.h[1], w1
1562; CHECK-NEXT:    mov v0.h[2], w2
1563; CHECK-NEXT:    shl v0.4h, v0.4h, #8
1564; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
1565; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
1566; CHECK-NEXT:    scvtf v0.4s, v0.4s
1567; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1568; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1569; CHECK-NEXT:    movi v2.4s, #127, msl #8
1570; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1571; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1572; CHECK-NEXT:    ret
1573entry:
1574  %c = sitofp <3 x i8> %a to <3 x bfloat>
1575  ret <3 x bfloat> %c
1576}
1577
1578define <3 x bfloat> @utofp_v3i8_v3bf16(<3 x i8> %a) {
1579; CHECK-LABEL: utofp_v3i8_v3bf16:
1580; CHECK:       // %bb.0: // %entry
1581; CHECK-NEXT:    fmov s0, w0
1582; CHECK-NEXT:    movi v1.4s, #1
1583; CHECK-NEXT:    mov v0.h[1], w1
1584; CHECK-NEXT:    mov v0.h[2], w2
1585; CHECK-NEXT:    bic v0.4h, #255, lsl #8
1586; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
1587; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1588; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1589; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1590; CHECK-NEXT:    movi v2.4s, #127, msl #8
1591; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1592; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1593; CHECK-NEXT:    ret
1594entry:
1595  %c = uitofp <3 x i8> %a to <3 x bfloat>
1596  ret <3 x bfloat> %c
1597}
1598
1599define <4 x bfloat> @stofp_v4i8_v4bf16(<4 x i8> %a) {
1600; CHECK-LABEL: stofp_v4i8_v4bf16:
1601; CHECK:       // %bb.0: // %entry
1602; CHECK-NEXT:    shl v0.4h, v0.4h, #8
1603; CHECK-NEXT:    movi v1.4s, #1
1604; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
1605; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
1606; CHECK-NEXT:    scvtf v0.4s, v0.4s
1607; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1608; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1609; CHECK-NEXT:    movi v2.4s, #127, msl #8
1610; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1611; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1612; CHECK-NEXT:    ret
1613entry:
1614  %c = sitofp <4 x i8> %a to <4 x bfloat>
1615  ret <4 x bfloat> %c
1616}
1617
1618define <4 x bfloat> @utofp_v4i8_v4bf16(<4 x i8> %a) {
1619; CHECK-LABEL: utofp_v4i8_v4bf16:
1620; CHECK:       // %bb.0: // %entry
1621; CHECK-NEXT:    bic v0.4h, #255, lsl #8
1622; CHECK-NEXT:    movi v1.4s, #1
1623; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
1624; CHECK-NEXT:    ucvtf v0.4s, v0.4s
1625; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
1626; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
1627; CHECK-NEXT:    movi v2.4s, #127, msl #8
1628; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
1629; CHECK-NEXT:    addhn v0.4h, v0.4s, v2.4s
1630; CHECK-NEXT:    ret
1631entry:
1632  %c = uitofp <4 x i8> %a to <4 x bfloat>
1633  ret <4 x bfloat> %c
1634}
1635
1636define <8 x bfloat> @stofp_v8i8_v8bf16(<8 x i8> %a) {
1637; CHECK-LABEL: stofp_v8i8_v8bf16:
1638; CHECK:       // %bb.0: // %entry
1639; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
1640; CHECK-NEXT:    movi v1.4s, #1
1641; CHECK-NEXT:    movi v4.4s, #127, msl #8
1642; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
1643; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
1644; CHECK-NEXT:    scvtf v2.4s, v2.4s
1645; CHECK-NEXT:    scvtf v3.4s, v0.4s
1646; CHECK-NEXT:    ushr v0.4s, v2.4s, #16
1647; CHECK-NEXT:    ushr v5.4s, v3.4s, #16
1648; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
1649; CHECK-NEXT:    and v1.16b, v5.16b, v1.16b
1650; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
1651; CHECK-NEXT:    add v1.4s, v1.4s, v4.4s
1652; CHECK-NEXT:    addhn v0.4h, v2.4s, v0.4s
1653; CHECK-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
1654; CHECK-NEXT:    ret
1655entry:
1656  %c = sitofp <8 x i8> %a to <8 x bfloat>
1657  ret <8 x bfloat> %c
1658}
1659
1660define <8 x bfloat> @utofp_v8i8_v8bf16(<8 x i8> %a) {
1661; CHECK-LABEL: utofp_v8i8_v8bf16:
1662; CHECK:       // %bb.0: // %entry
1663; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
1664; CHECK-NEXT:    movi v1.4s, #1
1665; CHECK-NEXT:    movi v4.4s, #127, msl #8
1666; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
1667; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
1668; CHECK-NEXT:    ucvtf v2.4s, v2.4s
1669; CHECK-NEXT:    ucvtf v3.4s, v0.4s
1670; CHECK-NEXT:    ushr v0.4s, v2.4s, #16
1671; CHECK-NEXT:    ushr v5.4s, v3.4s, #16
1672; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
1673; CHECK-NEXT:    and v1.16b, v5.16b, v1.16b
1674; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
1675; CHECK-NEXT:    add v1.4s, v1.4s, v4.4s
1676; CHECK-NEXT:    addhn v0.4h, v2.4s, v0.4s
1677; CHECK-NEXT:    addhn2 v0.8h, v3.4s, v1.4s
1678; CHECK-NEXT:    ret
1679entry:
1680  %c = uitofp <8 x i8> %a to <8 x bfloat>
1681  ret <8 x bfloat> %c
1682}
1683
1684define <16 x bfloat> @stofp_v16i8_v16bf16(<16 x i8> %a) {
1685; CHECK-LABEL: stofp_v16i8_v16bf16:
1686; CHECK:       // %bb.0: // %entry
1687; CHECK-NEXT:    sshll2 v2.8h, v0.16b, #0
1688; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
1689; CHECK-NEXT:    movi v1.4s, #1
1690; CHECK-NEXT:    movi v7.4s, #127, msl #8
1691; CHECK-NEXT:    sshll v3.4s, v2.4h, #0
1692; CHECK-NEXT:    sshll v4.4s, v0.4h, #0
1693; CHECK-NEXT:    sshll2 v2.4s, v2.8h, #0
1694; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
1695; CHECK-NEXT:    scvtf v3.4s, v3.4s
1696; CHECK-NEXT:    scvtf v4.4s, v4.4s
1697; CHECK-NEXT:    scvtf v2.4s, v2.4s
1698; CHECK-NEXT:    scvtf v6.4s, v0.4s
1699; CHECK-NEXT:    ushr v5.4s, v3.4s, #16
1700; CHECK-NEXT:    ushr v0.4s, v4.4s, #16
1701; CHECK-NEXT:    ushr v16.4s, v2.4s, #16
1702; CHECK-NEXT:    ushr v17.4s, v6.4s, #16
1703; CHECK-NEXT:    and v5.16b, v5.16b, v1.16b
1704; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
1705; CHECK-NEXT:    and v16.16b, v16.16b, v1.16b
1706; CHECK-NEXT:    and v17.16b, v17.16b, v1.16b
1707; CHECK-NEXT:    add v5.4s, v5.4s, v7.4s
1708; CHECK-NEXT:    add v0.4s, v0.4s, v7.4s
1709; CHECK-NEXT:    addhn v1.4h, v3.4s, v5.4s
1710; CHECK-NEXT:    addhn v0.4h, v4.4s, v0.4s
1711; CHECK-NEXT:    add v3.4s, v16.4s, v7.4s
1712; CHECK-NEXT:    add v4.4s, v17.4s, v7.4s
1713; CHECK-NEXT:    addhn2 v1.8h, v2.4s, v3.4s
1714; CHECK-NEXT:    addhn2 v0.8h, v6.4s, v4.4s
1715; CHECK-NEXT:    ret
1716entry:
1717  %c = sitofp <16 x i8> %a to <16 x bfloat>
1718  ret <16 x bfloat> %c
1719}
1720
1721define <16 x bfloat> @utofp_v16i8_v16bf16(<16 x i8> %a) {
1722; CHECK-LABEL: utofp_v16i8_v16bf16:
1723; CHECK:       // %bb.0: // %entry
1724; CHECK-NEXT:    ushll2 v2.8h, v0.16b, #0
1725; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
1726; CHECK-NEXT:    movi v1.4s, #1
1727; CHECK-NEXT:    movi v7.4s, #127, msl #8
1728; CHECK-NEXT:    ushll v3.4s, v2.4h, #0
1729; CHECK-NEXT:    ushll v4.4s, v0.4h, #0
1730; CHECK-NEXT:    ushll2 v2.4s, v2.8h, #0
1731; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
1732; CHECK-NEXT:    ucvtf v3.4s, v3.4s
1733; CHECK-NEXT:    ucvtf v4.4s, v4.4s
1734; CHECK-NEXT:    ucvtf v2.4s, v2.4s
1735; CHECK-NEXT:    ucvtf v6.4s, v0.4s
1736; CHECK-NEXT:    ushr v5.4s, v3.4s, #16
1737; CHECK-NEXT:    ushr v0.4s, v4.4s, #16
1738; CHECK-NEXT:    ushr v16.4s, v2.4s, #16
1739; CHECK-NEXT:    ushr v17.4s, v6.4s, #16
1740; CHECK-NEXT:    and v5.16b, v5.16b, v1.16b
1741; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
1742; CHECK-NEXT:    and v16.16b, v16.16b, v1.16b
1743; CHECK-NEXT:    and v17.16b, v17.16b, v1.16b
1744; CHECK-NEXT:    add v5.4s, v5.4s, v7.4s
1745; CHECK-NEXT:    add v0.4s, v0.4s, v7.4s
1746; CHECK-NEXT:    addhn v1.4h, v3.4s, v5.4s
1747; CHECK-NEXT:    addhn v0.4h, v4.4s, v0.4s
1748; CHECK-NEXT:    add v3.4s, v16.4s, v7.4s
1749; CHECK-NEXT:    add v4.4s, v17.4s, v7.4s
1750; CHECK-NEXT:    addhn2 v1.8h, v2.4s, v3.4s
1751; CHECK-NEXT:    addhn2 v0.8h, v6.4s, v4.4s
1752; CHECK-NEXT:    ret
1753entry:
1754  %c = uitofp <16 x i8> %a to <16 x bfloat>
1755  ret <16 x bfloat> %c
1756}
1757
1758define <32 x bfloat> @stofp_v32i8_v32bf16(<32 x i8> %a) {
1759; CHECK-LABEL: stofp_v32i8_v32bf16:
1760; CHECK:       // %bb.0: // %entry
1761; CHECK-NEXT:    sshll2 v3.8h, v0.16b, #0
1762; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
1763; CHECK-NEXT:    sshll2 v4.8h, v1.16b, #0
1764; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
1765; CHECK-NEXT:    movi v2.4s, #1
1766; CHECK-NEXT:    movi v21.4s, #127, msl #8
1767; CHECK-NEXT:    sshll v5.4s, v3.4h, #0
1768; CHECK-NEXT:    sshll v6.4s, v0.4h, #0
1769; CHECK-NEXT:    sshll v7.4s, v4.4h, #0
1770; CHECK-NEXT:    sshll v16.4s, v1.4h, #0
1771; CHECK-NEXT:    sshll2 v3.4s, v3.8h, #0
1772; CHECK-NEXT:    sshll2 v4.4s, v4.8h, #0
1773; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
1774; CHECK-NEXT:    sshll2 v1.4s, v1.8h, #0
1775; CHECK-NEXT:    scvtf v5.4s, v5.4s
1776; CHECK-NEXT:    scvtf v6.4s, v6.4s
1777; CHECK-NEXT:    scvtf v7.4s, v7.4s
1778; CHECK-NEXT:    scvtf v16.4s, v16.4s
1779; CHECK-NEXT:    scvtf v17.4s, v3.4s
1780; CHECK-NEXT:    scvtf v4.4s, v4.4s
1781; CHECK-NEXT:    scvtf v18.4s, v0.4s
1782; CHECK-NEXT:    scvtf v19.4s, v1.4s
1783; CHECK-NEXT:    ushr v0.4s, v5.4s, #16
1784; CHECK-NEXT:    ushr v3.4s, v6.4s, #16
1785; CHECK-NEXT:    ushr v1.4s, v7.4s, #16
1786; CHECK-NEXT:    ushr v20.4s, v16.4s, #16
1787; CHECK-NEXT:    ushr v23.4s, v17.4s, #16
1788; CHECK-NEXT:    ushr v24.4s, v4.4s, #16
1789; CHECK-NEXT:    ushr v22.4s, v18.4s, #16
1790; CHECK-NEXT:    ushr v25.4s, v19.4s, #16
1791; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
1792; CHECK-NEXT:    and v3.16b, v3.16b, v2.16b
1793; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
1794; CHECK-NEXT:    and v20.16b, v20.16b, v2.16b
1795; CHECK-NEXT:    and v23.16b, v23.16b, v2.16b
1796; CHECK-NEXT:    and v24.16b, v24.16b, v2.16b
1797; CHECK-NEXT:    and v22.16b, v22.16b, v2.16b
1798; CHECK-NEXT:    and v25.16b, v25.16b, v2.16b
1799; CHECK-NEXT:    add v0.4s, v0.4s, v21.4s
1800; CHECK-NEXT:    add v3.4s, v3.4s, v21.4s
1801; CHECK-NEXT:    add v26.4s, v1.4s, v21.4s
1802; CHECK-NEXT:    add v20.4s, v20.4s, v21.4s
1803; CHECK-NEXT:    addhn v1.4h, v5.4s, v0.4s
1804; CHECK-NEXT:    addhn v0.4h, v6.4s, v3.4s
1805; CHECK-NEXT:    addhn v3.4h, v7.4s, v26.4s
1806; CHECK-NEXT:    addhn v2.4h, v16.4s, v20.4s
1807; CHECK-NEXT:    add v5.4s, v22.4s, v21.4s
1808; CHECK-NEXT:    add v6.4s, v23.4s, v21.4s
1809; CHECK-NEXT:    add v7.4s, v24.4s, v21.4s
1810; CHECK-NEXT:    add v16.4s, v25.4s, v21.4s
1811; CHECK-NEXT:    addhn2 v0.8h, v18.4s, v5.4s
1812; CHECK-NEXT:    addhn2 v1.8h, v17.4s, v6.4s
1813; CHECK-NEXT:    addhn2 v3.8h, v4.4s, v7.4s
1814; CHECK-NEXT:    addhn2 v2.8h, v19.4s, v16.4s
1815; CHECK-NEXT:    ret
1816entry:
1817  %c = sitofp <32 x i8> %a to <32 x bfloat>
1818  ret <32 x bfloat> %c
1819}
1820
1821define <32 x bfloat> @utofp_v32i8_v32bf16(<32 x i8> %a) {
1822; CHECK-LABEL: utofp_v32i8_v32bf16:
1823; CHECK:       // %bb.0: // %entry
1824; CHECK-NEXT:    ushll2 v3.8h, v0.16b, #0
1825; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
1826; CHECK-NEXT:    ushll2 v4.8h, v1.16b, #0
1827; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
1828; CHECK-NEXT:    movi v2.4s, #1
1829; CHECK-NEXT:    movi v21.4s, #127, msl #8
1830; CHECK-NEXT:    ushll v5.4s, v3.4h, #0
1831; CHECK-NEXT:    ushll v6.4s, v0.4h, #0
1832; CHECK-NEXT:    ushll v7.4s, v4.4h, #0
1833; CHECK-NEXT:    ushll v16.4s, v1.4h, #0
1834; CHECK-NEXT:    ushll2 v3.4s, v3.8h, #0
1835; CHECK-NEXT:    ushll2 v4.4s, v4.8h, #0
1836; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
1837; CHECK-NEXT:    ushll2 v1.4s, v1.8h, #0
1838; CHECK-NEXT:    ucvtf v5.4s, v5.4s
1839; CHECK-NEXT:    ucvtf v6.4s, v6.4s
1840; CHECK-NEXT:    ucvtf v7.4s, v7.4s
1841; CHECK-NEXT:    ucvtf v16.4s, v16.4s
1842; CHECK-NEXT:    ucvtf v17.4s, v3.4s
1843; CHECK-NEXT:    ucvtf v4.4s, v4.4s
1844; CHECK-NEXT:    ucvtf v18.4s, v0.4s
1845; CHECK-NEXT:    ucvtf v19.4s, v1.4s
1846; CHECK-NEXT:    ushr v0.4s, v5.4s, #16
1847; CHECK-NEXT:    ushr v3.4s, v6.4s, #16
1848; CHECK-NEXT:    ushr v1.4s, v7.4s, #16
1849; CHECK-NEXT:    ushr v20.4s, v16.4s, #16
1850; CHECK-NEXT:    ushr v23.4s, v17.4s, #16
1851; CHECK-NEXT:    ushr v24.4s, v4.4s, #16
1852; CHECK-NEXT:    ushr v22.4s, v18.4s, #16
1853; CHECK-NEXT:    ushr v25.4s, v19.4s, #16
1854; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
1855; CHECK-NEXT:    and v3.16b, v3.16b, v2.16b
1856; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
1857; CHECK-NEXT:    and v20.16b, v20.16b, v2.16b
1858; CHECK-NEXT:    and v23.16b, v23.16b, v2.16b
1859; CHECK-NEXT:    and v24.16b, v24.16b, v2.16b
1860; CHECK-NEXT:    and v22.16b, v22.16b, v2.16b
1861; CHECK-NEXT:    and v25.16b, v25.16b, v2.16b
1862; CHECK-NEXT:    add v0.4s, v0.4s, v21.4s
1863; CHECK-NEXT:    add v3.4s, v3.4s, v21.4s
1864; CHECK-NEXT:    add v26.4s, v1.4s, v21.4s
1865; CHECK-NEXT:    add v20.4s, v20.4s, v21.4s
1866; CHECK-NEXT:    addhn v1.4h, v5.4s, v0.4s
1867; CHECK-NEXT:    addhn v0.4h, v6.4s, v3.4s
1868; CHECK-NEXT:    addhn v3.4h, v7.4s, v26.4s
1869; CHECK-NEXT:    addhn v2.4h, v16.4s, v20.4s
1870; CHECK-NEXT:    add v5.4s, v22.4s, v21.4s
1871; CHECK-NEXT:    add v6.4s, v23.4s, v21.4s
1872; CHECK-NEXT:    add v7.4s, v24.4s, v21.4s
1873; CHECK-NEXT:    add v16.4s, v25.4s, v21.4s
1874; CHECK-NEXT:    addhn2 v0.8h, v18.4s, v5.4s
1875; CHECK-NEXT:    addhn2 v1.8h, v17.4s, v6.4s
1876; CHECK-NEXT:    addhn2 v3.8h, v4.4s, v7.4s
1877; CHECK-NEXT:    addhn2 v2.8h, v19.4s, v16.4s
1878; CHECK-NEXT:    ret
1879entry:
1880  %c = uitofp <32 x i8> %a to <32 x bfloat>
1881  ret <32 x bfloat> %c
1882}
1883;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1884; CHECK-GI: {{.*}}
1885; CHECK-GI-FP16: {{.*}}
1886; CHECK-GI-NOFP16: {{.*}}
1887; CHECK-SD: {{.*}}
1888; CHECK-SD-FP16: {{.*}}
1889; CHECK-SD-NOFP16: {{.*}}
1890