xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp_trunc_store_fp32_to_bf16.ll (revision d47c4984e9ea80ffd01efb084df9485d314d1d14)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
3
4define void @scalar(float %num, ptr addrspace(1) %p) {
5; CHECK-LABEL: scalar:
6; CHECK:       ; %bb.0: ; %entry
7; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; CHECK-NEXT:    v_mov_b32_e32 v3, v2
9; CHECK-NEXT:    v_mov_b32_e32 v2, v1
10; CHECK-NEXT:    v_bfe_u32 v1, v0, 16, 1
11; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
12; CHECK-NEXT:    v_add3_u32 v1, v1, v0, s4
13; CHECK-NEXT:    v_or_b32_e32 v4, 0x400000, v0
14; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
15; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
16; CHECK-NEXT:    global_store_short_d16_hi v[2:3], v0, off
17; CHECK-NEXT:    s_waitcnt vmcnt(0)
18; CHECK-NEXT:    s_setpc_b64 s[30:31]
19entry:
20  %conv = fptrunc float %num to bfloat
21  store bfloat %conv, ptr addrspace(1) %p, align 8
22  ret void
23}
24
25define void @v2(<2 x float> %num, ptr addrspace(1) %p) {
26; CHECK-LABEL: v2:
27; CHECK:       ; %bb.0: ; %entry
28; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; CHECK-NEXT:    v_bfe_u32 v4, v0, 16, 1
30; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
31; CHECK-NEXT:    v_add3_u32 v4, v4, v0, s4
32; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v0
33; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
34; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
35; CHECK-NEXT:    v_bfe_u32 v4, v1, 16, 1
36; CHECK-NEXT:    v_add3_u32 v4, v4, v1, s4
37; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v1
38; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
39; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
40; CHECK-NEXT:    s_mov_b32 s4, 0x7060302
41; CHECK-NEXT:    v_perm_b32 v0, v1, v0, s4
42; CHECK-NEXT:    global_store_dword v[2:3], v0, off
43; CHECK-NEXT:    s_waitcnt vmcnt(0)
44; CHECK-NEXT:    s_setpc_b64 s[30:31]
45entry:
46  %conv = fptrunc <2 x float> %num to <2 x bfloat>
47  store <2 x bfloat> %conv, ptr addrspace(1) %p, align 8
48  ret void
49}
50
51define void @v3(<3 x float> %num, ptr addrspace(1) %p) {
52; CHECK-LABEL: v3:
53; CHECK:       ; %bb.0: ; %entry
54; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; CHECK-NEXT:    v_mov_b32_e32 v5, v4
56; CHECK-NEXT:    v_mov_b32_e32 v4, v3
57; CHECK-NEXT:    v_bfe_u32 v3, v0, 16, 1
58; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
59; CHECK-NEXT:    v_add3_u32 v3, v3, v0, s4
60; CHECK-NEXT:    v_or_b32_e32 v6, 0x400000, v0
61; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
62; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v6, vcc
63; CHECK-NEXT:    v_bfe_u32 v3, v1, 16, 1
64; CHECK-NEXT:    v_add3_u32 v3, v3, v1, s4
65; CHECK-NEXT:    v_or_b32_e32 v6, 0x400000, v1
66; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
67; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
68; CHECK-NEXT:    s_mov_b32 s5, 0x7060302
69; CHECK-NEXT:    v_perm_b32 v0, v1, v0, s5
70; CHECK-NEXT:    v_bfe_u32 v1, v2, 16, 1
71; CHECK-NEXT:    v_add3_u32 v1, v1, v2, s4
72; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v2
73; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
74; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
75; CHECK-NEXT:    global_store_short_d16_hi v[4:5], v1, off offset:4
76; CHECK-NEXT:    global_store_dword v[4:5], v0, off
77; CHECK-NEXT:    s_waitcnt vmcnt(0)
78; CHECK-NEXT:    s_setpc_b64 s[30:31]
79entry:
80  %conv = fptrunc <3 x float> %num to <3 x bfloat>
81  store <3 x bfloat> %conv, ptr addrspace(1) %p, align 8
82  ret void
83}
84
85define void @v4(<4 x float> %num, ptr addrspace(1) %p) {
86; CHECK-LABEL: v4:
87; CHECK:       ; %bb.0: ; %entry
88; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; CHECK-NEXT:    v_bfe_u32 v6, v2, 16, 1
90; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
91; CHECK-NEXT:    v_add3_u32 v6, v6, v2, s4
92; CHECK-NEXT:    v_or_b32_e32 v7, 0x400000, v2
93; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
94; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v7, vcc
95; CHECK-NEXT:    v_bfe_u32 v6, v3, 16, 1
96; CHECK-NEXT:    v_add3_u32 v6, v6, v3, s4
97; CHECK-NEXT:    v_or_b32_e32 v7, 0x400000, v3
98; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
99; CHECK-NEXT:    v_cndmask_b32_e32 v3, v6, v7, vcc
100; CHECK-NEXT:    s_mov_b32 s5, 0x7060302
101; CHECK-NEXT:    v_perm_b32 v3, v3, v2, s5
102; CHECK-NEXT:    v_bfe_u32 v2, v0, 16, 1
103; CHECK-NEXT:    v_add3_u32 v2, v2, v0, s4
104; CHECK-NEXT:    v_or_b32_e32 v6, 0x400000, v0
105; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
106; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
107; CHECK-NEXT:    v_bfe_u32 v2, v1, 16, 1
108; CHECK-NEXT:    v_add3_u32 v2, v2, v1, s4
109; CHECK-NEXT:    v_or_b32_e32 v6, 0x400000, v1
110; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
111; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v6, vcc
112; CHECK-NEXT:    v_perm_b32 v2, v1, v0, s5
113; CHECK-NEXT:    global_store_dwordx2 v[4:5], v[2:3], off
114; CHECK-NEXT:    s_waitcnt vmcnt(0)
115; CHECK-NEXT:    s_setpc_b64 s[30:31]
116entry:
117  %conv = fptrunc <4 x float> %num to <4 x bfloat>
118  store <4 x bfloat> %conv, ptr addrspace(1) %p, align 8
119  ret void
120}
121
122define void @v8(<8 x float> %num, ptr addrspace(1) %p) {
123; CHECK-LABEL: v8:
124; CHECK:       ; %bb.0: ; %entry
125; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; CHECK-NEXT:    v_bfe_u32 v10, v6, 16, 1
127; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
128; CHECK-NEXT:    v_add3_u32 v10, v10, v6, s4
129; CHECK-NEXT:    v_or_b32_e32 v11, 0x400000, v6
130; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
131; CHECK-NEXT:    v_cndmask_b32_e32 v6, v10, v11, vcc
132; CHECK-NEXT:    v_bfe_u32 v10, v7, 16, 1
133; CHECK-NEXT:    v_add3_u32 v10, v10, v7, s4
134; CHECK-NEXT:    v_or_b32_e32 v11, 0x400000, v7
135; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
136; CHECK-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
137; CHECK-NEXT:    s_mov_b32 s5, 0x7060302
138; CHECK-NEXT:    v_perm_b32 v7, v7, v6, s5
139; CHECK-NEXT:    v_bfe_u32 v6, v4, 16, 1
140; CHECK-NEXT:    v_add3_u32 v6, v6, v4, s4
141; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v4
142; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
143; CHECK-NEXT:    v_cndmask_b32_e32 v4, v6, v10, vcc
144; CHECK-NEXT:    v_bfe_u32 v6, v5, 16, 1
145; CHECK-NEXT:    v_add3_u32 v6, v6, v5, s4
146; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v5
147; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
148; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v10, vcc
149; CHECK-NEXT:    v_perm_b32 v6, v5, v4, s5
150; CHECK-NEXT:    v_bfe_u32 v4, v2, 16, 1
151; CHECK-NEXT:    v_add3_u32 v4, v4, v2, s4
152; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v2
153; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
154; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc
155; CHECK-NEXT:    v_bfe_u32 v4, v3, 16, 1
156; CHECK-NEXT:    v_add3_u32 v4, v4, v3, s4
157; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v3
158; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
159; CHECK-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
160; CHECK-NEXT:    v_perm_b32 v5, v3, v2, s5
161; CHECK-NEXT:    v_bfe_u32 v2, v0, 16, 1
162; CHECK-NEXT:    v_add3_u32 v2, v2, v0, s4
163; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v0
164; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
165; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
166; CHECK-NEXT:    v_bfe_u32 v2, v1, 16, 1
167; CHECK-NEXT:    v_add3_u32 v2, v2, v1, s4
168; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v1
169; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
170; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
171; CHECK-NEXT:    v_perm_b32 v4, v1, v0, s5
172; CHECK-NEXT:    global_store_dwordx4 v[8:9], v[4:7], off
173; CHECK-NEXT:    s_waitcnt vmcnt(0)
174; CHECK-NEXT:    s_setpc_b64 s[30:31]
175entry:
176  %conv = fptrunc <8 x float> %num to <8 x bfloat>
177  store <8 x bfloat> %conv, ptr addrspace(1) %p, align 8
178  ret void
179}
180
181define void @v16(<16 x float> %num, ptr addrspace(1) %p) {
182; CHECK-LABEL: v16:
183; CHECK:       ; %bb.0: ; %entry
184; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; CHECK-NEXT:    v_bfe_u32 v18, v6, 16, 1
186; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
187; CHECK-NEXT:    v_add3_u32 v18, v18, v6, s4
188; CHECK-NEXT:    v_or_b32_e32 v19, 0x400000, v6
189; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
190; CHECK-NEXT:    v_cndmask_b32_e32 v6, v18, v19, vcc
191; CHECK-NEXT:    v_bfe_u32 v18, v7, 16, 1
192; CHECK-NEXT:    v_add3_u32 v18, v18, v7, s4
193; CHECK-NEXT:    v_or_b32_e32 v19, 0x400000, v7
194; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
195; CHECK-NEXT:    v_cndmask_b32_e32 v7, v18, v19, vcc
196; CHECK-NEXT:    s_mov_b32 s5, 0x7060302
197; CHECK-NEXT:    v_perm_b32 v7, v7, v6, s5
198; CHECK-NEXT:    v_bfe_u32 v6, v4, 16, 1
199; CHECK-NEXT:    v_add3_u32 v6, v6, v4, s4
200; CHECK-NEXT:    v_or_b32_e32 v18, 0x400000, v4
201; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
202; CHECK-NEXT:    v_cndmask_b32_e32 v4, v6, v18, vcc
203; CHECK-NEXT:    v_bfe_u32 v6, v5, 16, 1
204; CHECK-NEXT:    v_add3_u32 v6, v6, v5, s4
205; CHECK-NEXT:    v_or_b32_e32 v18, 0x400000, v5
206; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
207; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v18, vcc
208; CHECK-NEXT:    v_perm_b32 v6, v5, v4, s5
209; CHECK-NEXT:    v_bfe_u32 v4, v2, 16, 1
210; CHECK-NEXT:    v_add3_u32 v4, v4, v2, s4
211; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v2
212; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
213; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc
214; CHECK-NEXT:    v_bfe_u32 v4, v3, 16, 1
215; CHECK-NEXT:    v_add3_u32 v4, v4, v3, s4
216; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v3
217; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
218; CHECK-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
219; CHECK-NEXT:    v_perm_b32 v5, v3, v2, s5
220; CHECK-NEXT:    v_bfe_u32 v2, v0, 16, 1
221; CHECK-NEXT:    v_add3_u32 v2, v2, v0, s4
222; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v0
223; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
224; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
225; CHECK-NEXT:    v_bfe_u32 v2, v1, 16, 1
226; CHECK-NEXT:    v_add3_u32 v2, v2, v1, s4
227; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v1
228; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
229; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
230; CHECK-NEXT:    v_perm_b32 v4, v1, v0, s5
231; CHECK-NEXT:    v_bfe_u32 v0, v14, 16, 1
232; CHECK-NEXT:    v_add3_u32 v0, v0, v14, s4
233; CHECK-NEXT:    v_or_b32_e32 v1, 0x400000, v14
234; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
235; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
236; CHECK-NEXT:    v_bfe_u32 v1, v15, 16, 1
237; CHECK-NEXT:    v_add3_u32 v1, v1, v15, s4
238; CHECK-NEXT:    v_or_b32_e32 v2, 0x400000, v15
239; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
240; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
241; CHECK-NEXT:    v_perm_b32 v3, v1, v0, s5
242; CHECK-NEXT:    v_bfe_u32 v0, v12, 16, 1
243; CHECK-NEXT:    v_add3_u32 v0, v0, v12, s4
244; CHECK-NEXT:    v_or_b32_e32 v1, 0x400000, v12
245; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
246; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
247; CHECK-NEXT:    v_bfe_u32 v1, v13, 16, 1
248; CHECK-NEXT:    v_add3_u32 v1, v1, v13, s4
249; CHECK-NEXT:    v_or_b32_e32 v2, 0x400000, v13
250; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
251; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
252; CHECK-NEXT:    v_perm_b32 v2, v1, v0, s5
253; CHECK-NEXT:    v_bfe_u32 v0, v10, 16, 1
254; CHECK-NEXT:    v_add3_u32 v0, v0, v10, s4
255; CHECK-NEXT:    v_or_b32_e32 v1, 0x400000, v10
256; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
257; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
258; CHECK-NEXT:    v_bfe_u32 v1, v11, 16, 1
259; CHECK-NEXT:    v_add3_u32 v1, v1, v11, s4
260; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v11
261; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
262; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
263; CHECK-NEXT:    v_perm_b32 v1, v1, v0, s5
264; CHECK-NEXT:    v_bfe_u32 v0, v8, 16, 1
265; CHECK-NEXT:    v_add3_u32 v0, v0, v8, s4
266; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v8
267; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
268; CHECK-NEXT:    v_bfe_u32 v8, v9, 16, 1
269; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
270; CHECK-NEXT:    v_add3_u32 v8, v8, v9, s4
271; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v9
272; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
273; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
274; CHECK-NEXT:    v_perm_b32 v0, v8, v0, s5
275; CHECK-NEXT:    global_store_dwordx4 v[16:17], v[0:3], off offset:16
276; CHECK-NEXT:    global_store_dwordx4 v[16:17], v[4:7], off
277; CHECK-NEXT:    s_waitcnt vmcnt(0)
278; CHECK-NEXT:    s_setpc_b64 s[30:31]
279entry:
280  %conv = fptrunc <16 x float> %num to <16 x bfloat>
281  store <16 x bfloat> %conv, ptr addrspace(1) %p, align 8
282  ret void
283}
284
285define void @v32(<32 x float> %num, ptr addrspace(1) %p) {
286; CHECK-LABEL: v32:
287; CHECK:       ; %bb.0: ; %entry
288; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; CHECK-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
290; CHECK-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
291; CHECK-NEXT:    buffer_load_dword v31, off, s[0:3], s32
292; CHECK-NEXT:    v_bfe_u32 v34, v6, 16, 1
293; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
294; CHECK-NEXT:    v_add3_u32 v34, v34, v6, s4
295; CHECK-NEXT:    v_or_b32_e32 v35, 0x400000, v6
296; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
297; CHECK-NEXT:    v_cndmask_b32_e32 v6, v34, v35, vcc
298; CHECK-NEXT:    v_bfe_u32 v34, v7, 16, 1
299; CHECK-NEXT:    v_add3_u32 v34, v34, v7, s4
300; CHECK-NEXT:    v_or_b32_e32 v35, 0x400000, v7
301; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v7, v7
302; CHECK-NEXT:    v_cndmask_b32_e32 v7, v34, v35, vcc
303; CHECK-NEXT:    s_mov_b32 s5, 0x7060302
304; CHECK-NEXT:    v_perm_b32 v7, v7, v6, s5
305; CHECK-NEXT:    v_bfe_u32 v6, v4, 16, 1
306; CHECK-NEXT:    v_add3_u32 v6, v6, v4, s4
307; CHECK-NEXT:    v_or_b32_e32 v34, 0x400000, v4
308; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
309; CHECK-NEXT:    v_cndmask_b32_e32 v4, v6, v34, vcc
310; CHECK-NEXT:    v_bfe_u32 v6, v5, 16, 1
311; CHECK-NEXT:    v_add3_u32 v6, v6, v5, s4
312; CHECK-NEXT:    v_or_b32_e32 v34, 0x400000, v5
313; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
314; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v34, vcc
315; CHECK-NEXT:    v_perm_b32 v6, v5, v4, s5
316; CHECK-NEXT:    v_bfe_u32 v4, v2, 16, 1
317; CHECK-NEXT:    v_add3_u32 v4, v4, v2, s4
318; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v2
319; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
320; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc
321; CHECK-NEXT:    v_bfe_u32 v4, v3, 16, 1
322; CHECK-NEXT:    v_add3_u32 v4, v4, v3, s4
323; CHECK-NEXT:    v_or_b32_e32 v5, 0x400000, v3
324; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
325; CHECK-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
326; CHECK-NEXT:    v_perm_b32 v5, v3, v2, s5
327; CHECK-NEXT:    v_bfe_u32 v2, v0, 16, 1
328; CHECK-NEXT:    v_add3_u32 v2, v2, v0, s4
329; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v0
330; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
331; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
332; CHECK-NEXT:    v_bfe_u32 v2, v1, 16, 1
333; CHECK-NEXT:    v_add3_u32 v2, v2, v1, s4
334; CHECK-NEXT:    v_or_b32_e32 v3, 0x400000, v1
335; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
336; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
337; CHECK-NEXT:    v_perm_b32 v4, v1, v0, s5
338; CHECK-NEXT:    v_bfe_u32 v0, v14, 16, 1
339; CHECK-NEXT:    v_add3_u32 v0, v0, v14, s4
340; CHECK-NEXT:    v_or_b32_e32 v1, 0x400000, v14
341; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v14, v14
342; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
343; CHECK-NEXT:    v_bfe_u32 v1, v15, 16, 1
344; CHECK-NEXT:    v_add3_u32 v1, v1, v15, s4
345; CHECK-NEXT:    v_or_b32_e32 v2, 0x400000, v15
346; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v15, v15
347; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
348; CHECK-NEXT:    v_perm_b32 v3, v1, v0, s5
349; CHECK-NEXT:    v_bfe_u32 v0, v12, 16, 1
350; CHECK-NEXT:    v_add3_u32 v0, v0, v12, s4
351; CHECK-NEXT:    v_or_b32_e32 v1, 0x400000, v12
352; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v12, v12
353; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
354; CHECK-NEXT:    v_bfe_u32 v1, v13, 16, 1
355; CHECK-NEXT:    v_add3_u32 v1, v1, v13, s4
356; CHECK-NEXT:    v_or_b32_e32 v2, 0x400000, v13
357; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v13, v13
358; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
359; CHECK-NEXT:    v_perm_b32 v2, v1, v0, s5
360; CHECK-NEXT:    v_bfe_u32 v0, v10, 16, 1
361; CHECK-NEXT:    v_add3_u32 v0, v0, v10, s4
362; CHECK-NEXT:    v_or_b32_e32 v1, 0x400000, v10
363; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v10, v10
364; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
365; CHECK-NEXT:    v_bfe_u32 v1, v11, 16, 1
366; CHECK-NEXT:    v_add3_u32 v1, v1, v11, s4
367; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v11
368; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v11, v11
369; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
370; CHECK-NEXT:    v_perm_b32 v1, v1, v0, s5
371; CHECK-NEXT:    v_bfe_u32 v0, v8, 16, 1
372; CHECK-NEXT:    v_add3_u32 v0, v0, v8, s4
373; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v8
374; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v8, v8
375; CHECK-NEXT:    v_bfe_u32 v8, v9, 16, 1
376; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
377; CHECK-NEXT:    v_add3_u32 v8, v8, v9, s4
378; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v9
379; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v9, v9
380; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
381; CHECK-NEXT:    v_perm_b32 v0, v8, v0, s5
382; CHECK-NEXT:    v_bfe_u32 v8, v22, 16, 1
383; CHECK-NEXT:    v_add3_u32 v8, v8, v22, s4
384; CHECK-NEXT:    v_or_b32_e32 v9, 0x400000, v22
385; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v22, v22
386; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
387; CHECK-NEXT:    v_bfe_u32 v9, v23, 16, 1
388; CHECK-NEXT:    v_add3_u32 v9, v9, v23, s4
389; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v23
390; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v23, v23
391; CHECK-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc
392; CHECK-NEXT:    v_perm_b32 v11, v9, v8, s5
393; CHECK-NEXT:    v_bfe_u32 v8, v20, 16, 1
394; CHECK-NEXT:    v_add3_u32 v8, v8, v20, s4
395; CHECK-NEXT:    v_or_b32_e32 v9, 0x400000, v20
396; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v20, v20
397; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
398; CHECK-NEXT:    v_bfe_u32 v9, v21, 16, 1
399; CHECK-NEXT:    v_add3_u32 v9, v9, v21, s4
400; CHECK-NEXT:    v_or_b32_e32 v10, 0x400000, v21
401; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v21, v21
402; CHECK-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc
403; CHECK-NEXT:    v_perm_b32 v10, v9, v8, s5
404; CHECK-NEXT:    v_bfe_u32 v8, v18, 16, 1
405; CHECK-NEXT:    v_add3_u32 v8, v8, v18, s4
406; CHECK-NEXT:    v_or_b32_e32 v9, 0x400000, v18
407; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v18, v18
408; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
409; CHECK-NEXT:    v_bfe_u32 v9, v19, 16, 1
410; CHECK-NEXT:    v_add3_u32 v9, v9, v19, s4
411; CHECK-NEXT:    v_or_b32_e32 v12, 0x400000, v19
412; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v19, v19
413; CHECK-NEXT:    v_cndmask_b32_e32 v9, v9, v12, vcc
414; CHECK-NEXT:    v_perm_b32 v9, v9, v8, s5
415; CHECK-NEXT:    v_bfe_u32 v8, v16, 16, 1
416; CHECK-NEXT:    v_add3_u32 v8, v8, v16, s4
417; CHECK-NEXT:    v_or_b32_e32 v12, 0x400000, v16
418; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v16, v16
419; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
420; CHECK-NEXT:    v_bfe_u32 v12, v17, 16, 1
421; CHECK-NEXT:    v_add3_u32 v12, v12, v17, s4
422; CHECK-NEXT:    v_or_b32_e32 v13, 0x400000, v17
423; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v17, v17
424; CHECK-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
425; CHECK-NEXT:    v_perm_b32 v8, v12, v8, s5
426; CHECK-NEXT:    v_bfe_u32 v12, v30, 16, 1
427; CHECK-NEXT:    v_add3_u32 v12, v12, v30, s4
428; CHECK-NEXT:    v_or_b32_e32 v13, 0x400000, v30
429; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v30, v30
430; CHECK-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
431; CHECK-NEXT:    s_waitcnt vmcnt(0)
432; CHECK-NEXT:    v_bfe_u32 v13, v31, 16, 1
433; CHECK-NEXT:    v_add3_u32 v13, v13, v31, s4
434; CHECK-NEXT:    v_or_b32_e32 v14, 0x400000, v31
435; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v31, v31
436; CHECK-NEXT:    v_cndmask_b32_e32 v13, v13, v14, vcc
437; CHECK-NEXT:    v_perm_b32 v15, v13, v12, s5
438; CHECK-NEXT:    v_bfe_u32 v12, v28, 16, 1
439; CHECK-NEXT:    v_add3_u32 v12, v12, v28, s4
440; CHECK-NEXT:    v_or_b32_e32 v13, 0x400000, v28
441; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v28, v28
442; CHECK-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
443; CHECK-NEXT:    v_bfe_u32 v13, v29, 16, 1
444; CHECK-NEXT:    v_add3_u32 v13, v13, v29, s4
445; CHECK-NEXT:    v_or_b32_e32 v14, 0x400000, v29
446; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v29, v29
447; CHECK-NEXT:    v_cndmask_b32_e32 v13, v13, v14, vcc
448; CHECK-NEXT:    v_perm_b32 v14, v13, v12, s5
449; CHECK-NEXT:    v_bfe_u32 v12, v26, 16, 1
450; CHECK-NEXT:    v_add3_u32 v12, v12, v26, s4
451; CHECK-NEXT:    v_or_b32_e32 v13, 0x400000, v26
452; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
453; CHECK-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
454; CHECK-NEXT:    v_bfe_u32 v13, v27, 16, 1
455; CHECK-NEXT:    v_add3_u32 v13, v13, v27, s4
456; CHECK-NEXT:    v_or_b32_e32 v16, 0x400000, v27
457; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v27, v27
458; CHECK-NEXT:    v_cndmask_b32_e32 v13, v13, v16, vcc
459; CHECK-NEXT:    v_perm_b32 v13, v13, v12, s5
460; CHECK-NEXT:    v_bfe_u32 v12, v24, 16, 1
461; CHECK-NEXT:    v_add3_u32 v12, v12, v24, s4
462; CHECK-NEXT:    v_or_b32_e32 v16, 0x400000, v24
463; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v24, v24
464; CHECK-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
465; CHECK-NEXT:    v_bfe_u32 v16, v25, 16, 1
466; CHECK-NEXT:    v_add3_u32 v16, v16, v25, s4
467; CHECK-NEXT:    v_or_b32_e32 v17, 0x400000, v25
468; CHECK-NEXT:    v_cmp_u_f32_e32 vcc, v25, v25
469; CHECK-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
470; CHECK-NEXT:    v_perm_b32 v12, v16, v12, s5
471; CHECK-NEXT:    global_store_dwordx4 v[32:33], v[12:15], off offset:48
472; CHECK-NEXT:    global_store_dwordx4 v[32:33], v[8:11], off offset:32
473; CHECK-NEXT:    global_store_dwordx4 v[32:33], v[0:3], off offset:16
474; CHECK-NEXT:    global_store_dwordx4 v[32:33], v[4:7], off
475; CHECK-NEXT:    s_waitcnt vmcnt(0)
476; CHECK-NEXT:    s_setpc_b64 s[30:31]
477entry:
478  %conv = fptrunc <32 x float> %num to <32 x bfloat>
479  store <32 x bfloat> %conv, ptr addrspace(1) %p, align 8
480  ret void
481}
482