xref: /llvm-project/llvm/test/CodeGen/AMDGPU/v_pack.ll (revision 26e13091ea5ac3a53d11b50265a506f88129d6ff)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GISEL %s
4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GCN-FAKE16 %s
5; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GCN-REAL16 %s
7; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-REAL16 %s
8
9declare i32 @llvm.amdgcn.workitem.id.x() #1
10
11define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
12; GCN-LABEL: v_pack_b32_v2f16:
13; GCN:       ; %bb.0:
14; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
15; GCN-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
16; GCN-NEXT:    s_waitcnt lgkmcnt(0)
17; GCN-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
18; GCN-NEXT:    s_waitcnt vmcnt(0)
19; GCN-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
20; GCN-NEXT:    s_waitcnt vmcnt(0)
21; GCN-NEXT:    v_add_f16_e32 v0, 2.0, v1
22; GCN-NEXT:    v_add_f16_e32 v1, 2.0, v2
23; GCN-NEXT:    v_pack_b32_f16 v0, v0, v1
24; GCN-NEXT:    ;;#ASMSTART
25; GCN-NEXT:    ; use v0
26; GCN-NEXT:    ;;#ASMEND
27; GCN-NEXT:    s_endpgm
28;
29; GISEL-LABEL: v_pack_b32_v2f16:
30; GISEL:       ; %bb.0:
31; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
32; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
33; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
34; GISEL-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
35; GISEL-NEXT:    s_waitcnt vmcnt(0)
36; GISEL-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
37; GISEL-NEXT:    s_waitcnt vmcnt(0)
38; GISEL-NEXT:    v_add_f16_e32 v0, 2.0, v1
39; GISEL-NEXT:    v_add_f16_e32 v1, 2.0, v2
40; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
41; GISEL-NEXT:    ;;#ASMSTART
42; GISEL-NEXT:    ; use v0
43; GISEL-NEXT:    ;;#ASMEND
44; GISEL-NEXT:    s_endpgm
45;
46; GFX11-GCN-FAKE16-LABEL: v_pack_b32_v2f16:
47; GFX11-GCN-FAKE16:       ; %bb.0:
48; GFX11-GCN-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
49; GFX11-GCN-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
50; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
51; GFX11-GCN-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
52; GFX11-GCN-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
53; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
54; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
55; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
56; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
57; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v1, 2.0, v1
58; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
59; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
60; GFX11-GCN-FAKE16-NEXT:    v_pack_b32_f16 v0, v1, v0
61; GFX11-GCN-FAKE16-NEXT:    ;;#ASMSTART
62; GFX11-GCN-FAKE16-NEXT:    ; use v0
63; GFX11-GCN-FAKE16-NEXT:    ;;#ASMEND
64; GFX11-GCN-FAKE16-NEXT:    s_endpgm
65;
66; GFX11-GISEL-FAKE16-LABEL: v_pack_b32_v2f16:
67; GFX11-GISEL-FAKE16:       ; %bb.0:
68; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
69; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
70; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
71; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
72; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
74; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
75; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
76; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
77; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 2.0, v1
78; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
79; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
80; GFX11-GISEL-FAKE16-NEXT:    v_pack_b32_f16 v0, v1, v0
81; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMSTART
82; GFX11-GISEL-FAKE16-NEXT:    ; use v0
83; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMEND
84; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
85;
86; GFX11-GCN-REAL16-LABEL: v_pack_b32_v2f16:
87; GFX11-GCN-REAL16:       ; %bb.0:
88; GFX11-GCN-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
89; GFX11-GCN-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
90; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
91; GFX11-GCN-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
92; GFX11-GCN-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
93; GFX11-GCN-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
94; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
95; GFX11-GCN-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
96; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
97; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.l, v1.l
98; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.h, v2.l
99; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
100; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.l, 2.0, v0.l
101; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v0.h
102; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
103; GFX11-GCN-REAL16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
104; GFX11-GCN-REAL16-NEXT:    ;;#ASMSTART
105; GFX11-GCN-REAL16-NEXT:    ; use v0
106; GFX11-GCN-REAL16-NEXT:    ;;#ASMEND
107; GFX11-GCN-REAL16-NEXT:    s_endpgm
108;
109; GFX11-GISEL-REAL16-LABEL: v_pack_b32_v2f16:
110; GFX11-GISEL-REAL16:       ; %bb.0:
111; GFX11-GISEL-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
112; GFX11-GISEL-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
113; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
114; GFX11-GISEL-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
115; GFX11-GISEL-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
116; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
117; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
118; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
119; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
120; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.l, 2.0, v1.l
121; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v2.l
122; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
123; GFX11-GISEL-REAL16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
124; GFX11-GISEL-REAL16-NEXT:    ;;#ASMSTART
125; GFX11-GISEL-REAL16-NEXT:    ; use v0
126; GFX11-GISEL-REAL16-NEXT:    ;;#ASMEND
127; GFX11-GISEL-REAL16-NEXT:    s_endpgm
128  %tid = call i32 @llvm.amdgcn.workitem.id.x()
129  %tid.ext = sext i32 %tid to i64
130  %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext
131  %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext
132  %v0 = load volatile half, ptr addrspace(1) %in0.gep
133  %v1 = load volatile half, ptr addrspace(1) %in1.gep
134  %v0.add = fadd half %v0, 2.0
135  %v1.add = fadd half %v1, 2.0
136  %vec.0 = insertelement <2 x half> undef, half %v0.add, i32 0
137  %vec.1 = insertelement <2 x half> %vec.0, half %v1.add, i32 1
138  %vec.i32 = bitcast <2 x half> %vec.1 to i32
139  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
140  ret void
141}
142
143define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
144; GCN-LABEL: v_pack_b32_v2f16_sub:
145; GCN:       ; %bb.0:
146; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
147; GCN-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
148; GCN-NEXT:    s_waitcnt lgkmcnt(0)
149; GCN-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
150; GCN-NEXT:    s_waitcnt vmcnt(0)
151; GCN-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
152; GCN-NEXT:    s_waitcnt vmcnt(0)
153; GCN-NEXT:    v_subrev_f16_e32 v0, 2.0, v1
154; GCN-NEXT:    v_add_f16_e32 v1, 2.0, v2
155; GCN-NEXT:    v_pack_b32_f16 v0, v0, v1
156; GCN-NEXT:    ;;#ASMSTART
157; GCN-NEXT:    ; use v0
158; GCN-NEXT:    ;;#ASMEND
159; GCN-NEXT:    s_endpgm
160;
161; GISEL-LABEL: v_pack_b32_v2f16_sub:
162; GISEL:       ; %bb.0:
163; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
164; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
165; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
166; GISEL-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
167; GISEL-NEXT:    s_waitcnt vmcnt(0)
168; GISEL-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
169; GISEL-NEXT:    s_waitcnt vmcnt(0)
170; GISEL-NEXT:    v_subrev_f16_e32 v0, 2.0, v1
171; GISEL-NEXT:    v_add_f16_e32 v1, 2.0, v2
172; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
173; GISEL-NEXT:    ;;#ASMSTART
174; GISEL-NEXT:    ; use v0
175; GISEL-NEXT:    ;;#ASMEND
176; GISEL-NEXT:    s_endpgm
177;
178; GFX11-GCN-FAKE16-LABEL: v_pack_b32_v2f16_sub:
179; GFX11-GCN-FAKE16:       ; %bb.0:
180; GFX11-GCN-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
181; GFX11-GCN-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
182; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
183; GFX11-GCN-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
184; GFX11-GCN-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
185; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
186; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
187; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
188; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
189; GFX11-GCN-FAKE16-NEXT:    v_subrev_f16_e32 v1, 2.0, v1
190; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
191; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
192; GFX11-GCN-FAKE16-NEXT:    v_pack_b32_f16 v0, v1, v0
193; GFX11-GCN-FAKE16-NEXT:    ;;#ASMSTART
194; GFX11-GCN-FAKE16-NEXT:    ; use v0
195; GFX11-GCN-FAKE16-NEXT:    ;;#ASMEND
196; GFX11-GCN-FAKE16-NEXT:    s_endpgm
197;
198; GFX11-GISEL-FAKE16-LABEL: v_pack_b32_v2f16_sub:
199; GFX11-GISEL-FAKE16:       ; %bb.0:
200; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
201; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
202; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
203; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
204; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
205; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
206; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
207; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
208; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
209; GFX11-GISEL-FAKE16-NEXT:    v_subrev_f16_e32 v1, 2.0, v1
210; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
211; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
212; GFX11-GISEL-FAKE16-NEXT:    v_pack_b32_f16 v0, v1, v0
213; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMSTART
214; GFX11-GISEL-FAKE16-NEXT:    ; use v0
215; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMEND
216; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
217;
218; GFX11-GCN-REAL16-LABEL: v_pack_b32_v2f16_sub:
219; GFX11-GCN-REAL16:       ; %bb.0:
220; GFX11-GCN-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
221; GFX11-GCN-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
222; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
223; GFX11-GCN-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
224; GFX11-GCN-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
225; GFX11-GCN-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
226; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
227; GFX11-GCN-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
228; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
229; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.l, v1.l
230; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.h, v2.l
231; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
232; GFX11-GCN-REAL16-NEXT:    v_subrev_f16_e32 v0.l, 2.0, v0.l
233; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v0.h
234; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
235; GFX11-GCN-REAL16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
236; GFX11-GCN-REAL16-NEXT:    ;;#ASMSTART
237; GFX11-GCN-REAL16-NEXT:    ; use v0
238; GFX11-GCN-REAL16-NEXT:    ;;#ASMEND
239; GFX11-GCN-REAL16-NEXT:    s_endpgm
240;
241; GFX11-GISEL-REAL16-LABEL: v_pack_b32_v2f16_sub:
242; GFX11-GISEL-REAL16:       ; %bb.0:
243; GFX11-GISEL-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
244; GFX11-GISEL-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
245; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
246; GFX11-GISEL-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
247; GFX11-GISEL-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
248; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
249; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
250; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
251; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
252; GFX11-GISEL-REAL16-NEXT:    v_subrev_f16_e32 v0.l, 2.0, v1.l
253; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v2.l
254; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
255; GFX11-GISEL-REAL16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
256; GFX11-GISEL-REAL16-NEXT:    ;;#ASMSTART
257; GFX11-GISEL-REAL16-NEXT:    ; use v0
258; GFX11-GISEL-REAL16-NEXT:    ;;#ASMEND
259; GFX11-GISEL-REAL16-NEXT:    s_endpgm
260  %tid = call i32 @llvm.amdgcn.workitem.id.x()
261  %tid.ext = sext i32 %tid to i64
262  %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext
263  %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext
264  %v0 = load volatile half, ptr addrspace(1) %in0.gep
265  %v1 = load volatile half, ptr addrspace(1) %in1.gep
266  %v0.add = fsub half %v0, 2.0
267  %v1.add = fadd half %v1, 2.0
268  %vec.0 = insertelement <2 x half> undef, half %v0.add, i32 0
269  %vec.1 = insertelement <2 x half> %vec.0, half %v1.add, i32 1
270  %vec.i32 = bitcast <2 x half> %vec.1 to i32
271  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
272  ret void
273}
274
275define amdgpu_kernel void @fptrunc(
276; GCN-LABEL: fptrunc:
277; GCN:       ; %bb.0:
278; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
279; GCN-NEXT:    s_mov_b32 s6, -1
280; GCN-NEXT:    s_mov_b32 s7, 0x31016000
281; GCN-NEXT:    s_mov_b32 s10, s6
282; GCN-NEXT:    s_mov_b32 s11, s7
283; GCN-NEXT:    s_waitcnt lgkmcnt(0)
284; GCN-NEXT:    s_mov_b32 s8, s2
285; GCN-NEXT:    s_mov_b32 s9, s3
286; GCN-NEXT:    s_mov_b32 s4, s0
287; GCN-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
288; GCN-NEXT:    s_mov_b32 s5, s1
289; GCN-NEXT:    s_waitcnt vmcnt(0)
290; GCN-NEXT:    v_cvt_f16_f32_e32 v1, v1
291; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
292; GCN-NEXT:    v_pack_b32_f16 v0, v0, v1
293; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
294; GCN-NEXT:    s_endpgm
295;
296; GISEL-LABEL: fptrunc:
297; GISEL:       ; %bb.0:
298; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
299; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
300; GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
301; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
302; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
303; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, s3
304; GISEL-NEXT:    s_mov_b32 s2, -1
305; GISEL-NEXT:    s_mov_b32 s3, 0x31016000
306; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
307; GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
308; GISEL-NEXT:    s_endpgm
309;
310; GFX11-GCN-FAKE16-LABEL: fptrunc:
311; GFX11-GCN-FAKE16:       ; %bb.0:
312; GFX11-GCN-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
313; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s6, -1
314; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
315; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s10, s6
316; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s11, s7
317; GFX11-GCN-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
318; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s8, s2
319; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s9, s3
320; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s4, s0
321; GFX11-GCN-FAKE16-NEXT:    buffer_load_b64 v[0:1], off, s[8:11], 0
322; GFX11-GCN-FAKE16-NEXT:    s_mov_b32 s5, s1
323; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
324; GFX11-GCN-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1
325; GFX11-GCN-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
326; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
327; GFX11-GCN-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
328; GFX11-GCN-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
329; GFX11-GCN-FAKE16-NEXT:    s_endpgm
330;
331; GFX11-GISEL-FAKE16-LABEL: fptrunc:
332; GFX11-GISEL-FAKE16:       ; %bb.0:
333; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
334; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
335; GFX11-GISEL-FAKE16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
336; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
337; GFX11-GISEL-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, s2
338; GFX11-GISEL-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, s3
339; GFX11-GISEL-FAKE16-NEXT:    s_mov_b32 s2, -1
340; GFX11-GISEL-FAKE16-NEXT:    s_mov_b32 s3, 0x31016000
341; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
342; GFX11-GISEL-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
343; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
344; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
345;
346; GFX11-GCN-REAL16-LABEL: fptrunc:
347; GFX11-GCN-REAL16:       ; %bb.0:
348; GFX11-GCN-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
349; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s6, -1
350; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s7, 0x31016000
351; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s10, s6
352; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s11, s7
353; GFX11-GCN-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
354; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s8, s2
355; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s9, s3
356; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s4, s0
357; GFX11-GCN-REAL16-NEXT:    buffer_load_b64 v[1:2], off, s[8:11], 0
358; GFX11-GCN-REAL16-NEXT:    s_mov_b32 s5, s1
359; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
360; GFX11-GCN-REAL16-NEXT:    v_cvt_f16_f32_e32 v0.l, v2
361; GFX11-GCN-REAL16-NEXT:    v_cvt_f16_f32_e32 v0.h, v1
362; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
363; GFX11-GCN-REAL16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
364; GFX11-GCN-REAL16-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
365; GFX11-GCN-REAL16-NEXT:    s_endpgm
366;
367; GFX11-GISEL-REAL16-LABEL: fptrunc:
368; GFX11-GISEL-REAL16:       ; %bb.0:
369; GFX11-GISEL-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
370; GFX11-GISEL-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
371; GFX11-GISEL-REAL16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
372; GFX11-GISEL-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
373; GFX11-GISEL-REAL16-NEXT:    v_cvt_f16_f32_e32 v0.l, s2
374; GFX11-GISEL-REAL16-NEXT:    v_cvt_f16_f32_e32 v0.h, s3
375; GFX11-GISEL-REAL16-NEXT:    s_mov_b32 s2, -1
376; GFX11-GISEL-REAL16-NEXT:    s_mov_b32 s3, 0x31016000
377; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
378; GFX11-GISEL-REAL16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
379; GFX11-GISEL-REAL16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
380; GFX11-GISEL-REAL16-NEXT:    s_endpgm
381    ptr addrspace(1) %r,
382    ptr addrspace(1) %a) {
383  %a.val = load <2 x float>, ptr addrspace(1) %a
384  %r.val = fptrunc <2 x float> %a.val to <2 x half>
385  store <2 x half> %r.val, ptr addrspace(1) %r
386  ret void
387}
388
389define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
390; GCN-LABEL: v_pack_b32.fabs:
391; GCN:       ; %bb.0:
392; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
393; GCN-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
394; GCN-NEXT:    s_waitcnt lgkmcnt(0)
395; GCN-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
396; GCN-NEXT:    s_waitcnt vmcnt(0)
397; GCN-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
398; GCN-NEXT:    s_waitcnt vmcnt(0)
399; GCN-NEXT:    v_add_f16_e32 v0, 2.0, v1
400; GCN-NEXT:    v_add_f16_e32 v1, 2.0, v2
401; GCN-NEXT:    v_pack_b32_f16 v0, |v0|, |v1|
402; GCN-NEXT:    ;;#ASMSTART
403; GCN-NEXT:    ; use v0
404; GCN-NEXT:    ;;#ASMEND
405; GCN-NEXT:    s_endpgm
406;
407; GISEL-LABEL: v_pack_b32.fabs:
408; GISEL:       ; %bb.0:
409; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
410; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
411; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
412; GISEL-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
413; GISEL-NEXT:    s_waitcnt vmcnt(0)
414; GISEL-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
415; GISEL-NEXT:    s_waitcnt vmcnt(0)
416; GISEL-NEXT:    v_add_f16_e32 v0, 2.0, v1
417; GISEL-NEXT:    v_add_f16_e32 v1, 2.0, v2
418; GISEL-NEXT:    v_pack_b32_f16 v0, |v0|, |v1|
419; GISEL-NEXT:    ;;#ASMSTART
420; GISEL-NEXT:    ; use v0
421; GISEL-NEXT:    ;;#ASMEND
422; GISEL-NEXT:    s_endpgm
423;
424; GFX11-GCN-FAKE16-LABEL: v_pack_b32.fabs:
425; GFX11-GCN-FAKE16:       ; %bb.0:
426; GFX11-GCN-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
427; GFX11-GCN-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
428; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
429; GFX11-GCN-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
430; GFX11-GCN-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
431; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
432; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
433; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
434; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
435; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v1, 2.0, v1
436; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
437; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
438; GFX11-GCN-FAKE16-NEXT:    v_pack_b32_f16 v0, |v1|, |v0|
439; GFX11-GCN-FAKE16-NEXT:    ;;#ASMSTART
440; GFX11-GCN-FAKE16-NEXT:    ; use v0
441; GFX11-GCN-FAKE16-NEXT:    ;;#ASMEND
442; GFX11-GCN-FAKE16-NEXT:    s_endpgm
443;
444; GFX11-GISEL-FAKE16-LABEL: v_pack_b32.fabs:
445; GFX11-GISEL-FAKE16:       ; %bb.0:
446; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
447; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
448; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
449; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
450; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
451; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
452; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
453; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
454; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
455; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 2.0, v1
456; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
457; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
458; GFX11-GISEL-FAKE16-NEXT:    v_pack_b32_f16 v0, |v1|, |v0|
459; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMSTART
460; GFX11-GISEL-FAKE16-NEXT:    ; use v0
461; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMEND
462; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
463;
464; GFX11-GCN-REAL16-LABEL: v_pack_b32.fabs:
465; GFX11-GCN-REAL16:       ; %bb.0:
466; GFX11-GCN-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
467; GFX11-GCN-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
468; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
469; GFX11-GCN-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
470; GFX11-GCN-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
471; GFX11-GCN-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
472; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
473; GFX11-GCN-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
474; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
475; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.l, v1.l
476; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.h, v2.l
477; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
478; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.l, 2.0, v0.l
479; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v0.h
480; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
481; GFX11-GCN-REAL16-NEXT:    v_pack_b32_f16 v0, |v0.l|, |v0.h|
482; GFX11-GCN-REAL16-NEXT:    ;;#ASMSTART
483; GFX11-GCN-REAL16-NEXT:    ; use v0
484; GFX11-GCN-REAL16-NEXT:    ;;#ASMEND
485; GFX11-GCN-REAL16-NEXT:    s_endpgm
486;
487; GFX11-GISEL-REAL16-LABEL: v_pack_b32.fabs:
488; GFX11-GISEL-REAL16:       ; %bb.0:
489; GFX11-GISEL-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
490; GFX11-GISEL-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
491; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
492; GFX11-GISEL-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
493; GFX11-GISEL-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
494; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
495; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
496; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
497; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
498; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.l, 2.0, v1.l
499; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v2.l
500; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
501; GFX11-GISEL-REAL16-NEXT:    v_pack_b32_f16 v0, |v0.l|, |v0.h|
502; GFX11-GISEL-REAL16-NEXT:    ;;#ASMSTART
503; GFX11-GISEL-REAL16-NEXT:    ; use v0
504; GFX11-GISEL-REAL16-NEXT:    ;;#ASMEND
505; GFX11-GISEL-REAL16-NEXT:    s_endpgm
506  %tid = call i32 @llvm.amdgcn.workitem.id.x()
507  %tid.ext = sext i32 %tid to i64
508  %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext
509  %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext
510  %v0 = load volatile half, ptr addrspace(1) %in0.gep
511  %v1 = load volatile half, ptr addrspace(1) %in1.gep
512  %v0.add = fadd half %v0, 2.0
513  %v1.add = fadd half %v1, 2.0
514  %v0.fabs = call half @llvm.fabs.f16(half %v0.add)
515  %v1.fabs = call half @llvm.fabs.f16(half %v1.add)
516  %vec.0 = insertelement <2 x half> undef, half %v0.fabs, i32 0
517  %vec.1 = insertelement <2 x half> %vec.0, half %v1.fabs, i32 1
518  %vec.i32 = bitcast <2 x half> %vec.1 to i32
519  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
520  ret void
521}
522
523define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
524; GCN-LABEL: v_pack_b32.fneg:
525; GCN:       ; %bb.0:
526; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
527; GCN-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
528; GCN-NEXT:    s_waitcnt lgkmcnt(0)
529; GCN-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
530; GCN-NEXT:    s_waitcnt vmcnt(0)
531; GCN-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
532; GCN-NEXT:    s_waitcnt vmcnt(0)
533; GCN-NEXT:    v_add_f16_e32 v0, 2.0, v1
534; GCN-NEXT:    v_add_f16_e32 v1, 2.0, v2
535; GCN-NEXT:    v_pack_b32_f16 v0, -v0, -v1
536; GCN-NEXT:    ;;#ASMSTART
537; GCN-NEXT:    ; use v0
538; GCN-NEXT:    ;;#ASMEND
539; GCN-NEXT:    s_endpgm
540;
541; GISEL-LABEL: v_pack_b32.fneg:
542; GISEL:       ; %bb.0:
543; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
544; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
545; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
546; GISEL-NEXT:    global_load_ushort v1, v0, s[0:1] glc dlc
547; GISEL-NEXT:    s_waitcnt vmcnt(0)
548; GISEL-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
549; GISEL-NEXT:    s_waitcnt vmcnt(0)
550; GISEL-NEXT:    v_add_f16_e32 v0, 2.0, v1
551; GISEL-NEXT:    v_add_f16_e32 v1, 2.0, v2
552; GISEL-NEXT:    v_pack_b32_f16 v0, -v0, -v1
553; GISEL-NEXT:    ;;#ASMSTART
554; GISEL-NEXT:    ; use v0
555; GISEL-NEXT:    ;;#ASMEND
556; GISEL-NEXT:    s_endpgm
557;
558; GFX11-GCN-FAKE16-LABEL: v_pack_b32.fneg:
559; GFX11-GCN-FAKE16:       ; %bb.0:
560; GFX11-GCN-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
561; GFX11-GCN-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
562; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
563; GFX11-GCN-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
564; GFX11-GCN-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
565; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
566; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
567; GFX11-GCN-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
568; GFX11-GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0)
569; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v1, 2.0, v1
570; GFX11-GCN-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
571; GFX11-GCN-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
572; GFX11-GCN-FAKE16-NEXT:    v_pack_b32_f16 v0, -v1, -v0
573; GFX11-GCN-FAKE16-NEXT:    ;;#ASMSTART
574; GFX11-GCN-FAKE16-NEXT:    ; use v0
575; GFX11-GCN-FAKE16-NEXT:    ;;#ASMEND
576; GFX11-GCN-FAKE16-NEXT:    s_endpgm
577;
578; GFX11-GISEL-FAKE16-LABEL: v_pack_b32.fneg:
579; GFX11-GISEL-FAKE16:       ; %bb.0:
580; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
581; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
582; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
583; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
584; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
585; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
586; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
587; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v0, v0, s[2:3] glc dlc
588; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
589; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 2.0, v1
590; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, 2.0, v0
591; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
592; GFX11-GISEL-FAKE16-NEXT:    v_pack_b32_f16 v0, -v1, -v0
593; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMSTART
594; GFX11-GISEL-FAKE16-NEXT:    ; use v0
595; GFX11-GISEL-FAKE16-NEXT:    ;;#ASMEND
596; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
597;
598; GFX11-GCN-REAL16-LABEL: v_pack_b32.fneg:
599; GFX11-GCN-REAL16:       ; %bb.0:
600; GFX11-GCN-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
601; GFX11-GCN-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
602; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
603; GFX11-GCN-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
604; GFX11-GCN-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
605; GFX11-GCN-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
606; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
607; GFX11-GCN-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
608; GFX11-GCN-REAL16-NEXT:    s_waitcnt vmcnt(0)
609; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.l, v1.l
610; GFX11-GCN-REAL16-NEXT:    v_mov_b16_e32 v0.h, v2.l
611; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
612; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.l, 2.0, v0.l
613; GFX11-GCN-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v0.h
614; GFX11-GCN-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
615; GFX11-GCN-REAL16-NEXT:    v_pack_b32_f16 v0, -v0.l, -v0.h
616; GFX11-GCN-REAL16-NEXT:    ;;#ASMSTART
617; GFX11-GCN-REAL16-NEXT:    ; use v0
618; GFX11-GCN-REAL16-NEXT:    ;;#ASMEND
619; GFX11-GCN-REAL16-NEXT:    s_endpgm
620;
621; GFX11-GISEL-REAL16-LABEL: v_pack_b32.fneg:
622; GFX11-GISEL-REAL16:       ; %bb.0:
623; GFX11-GISEL-REAL16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
624; GFX11-GISEL-REAL16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
625; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
626; GFX11-GISEL-REAL16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
627; GFX11-GISEL-REAL16-NEXT:    s_waitcnt lgkmcnt(0)
628; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v1, v0, s[0:1] glc dlc
629; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
630; GFX11-GISEL-REAL16-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
631; GFX11-GISEL-REAL16-NEXT:    s_waitcnt vmcnt(0)
632; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.l, 2.0, v1.l
633; GFX11-GISEL-REAL16-NEXT:    v_add_f16_e32 v0.h, 2.0, v2.l
634; GFX11-GISEL-REAL16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
635; GFX11-GISEL-REAL16-NEXT:    v_pack_b32_f16 v0, -v0.l, -v0.h
636; GFX11-GISEL-REAL16-NEXT:    ;;#ASMSTART
637; GFX11-GISEL-REAL16-NEXT:    ; use v0
638; GFX11-GISEL-REAL16-NEXT:    ;;#ASMEND
639; GFX11-GISEL-REAL16-NEXT:    s_endpgm
640  %tid = call i32 @llvm.amdgcn.workitem.id.x()
641  %tid.ext = sext i32 %tid to i64
642  %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext
643  %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext
644  %v0 = load volatile half, ptr addrspace(1) %in0.gep
645  %v1 = load volatile half, ptr addrspace(1) %in1.gep
646  %v0.add = fadd half %v0, 2.0
647  %v1.add = fadd half %v1, 2.0
648  %v0.fneg = fsub half -0.0, %v0.add
649  %v1.fneg = fsub half -0.0, %v1.add
650  %vec.0 = insertelement <2 x half> undef, half %v0.fneg, i32 0
651  %vec.1 = insertelement <2 x half> %vec.0, half %v1.fneg, i32 1
652  %vec.i32 = bitcast <2 x half> %vec.1 to i32
653  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
654  ret void
655}
656
657declare half @llvm.fabs.f16(half) #1
658
659attributes #0 = { nounwind }
660attributes #1 = { nounwind readnone }
661
662