xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; FIXME: Enable f16 promotion
3; XUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
7
8; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
9; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
10; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
11; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
12
13; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 {
14;   %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
15;   ret half %result
16; }
17
18define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 {
19; GFX8-SDAG-LABEL: test_ldexp_f16_i32:
20; GFX8-SDAG:       ; %bb.0:
21; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX8-SDAG-NEXT:    s_movk_i32 s4, 0x8000
23; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7fff
24; GFX8-SDAG-NEXT:    v_med3_i32 v0, v3, s4, v0
25; GFX8-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
26; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
27;
28; GFX9-SDAG-LABEL: test_ldexp_f16_i32:
29; GFX9-SDAG:       ; %bb.0:
30; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x8000
32; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7fff
33; GFX9-SDAG-NEXT:    v_med3_i32 v0, v3, s4, v0
34; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
35; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
38; GFX11-SDAG:       ; %bb.0:
39; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
41; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
42; GFX11-SDAG-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
43; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
44; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
45;
46; GFX8-GISEL-LABEL: test_ldexp_f16_i32:
47; GFX8-GISEL:       ; %bb.0:
48; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
50; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
51; GFX8-GISEL-NEXT:    v_med3_i32 v0, v3, v0, v1
52; GFX8-GISEL-NEXT:    v_ldexp_f16_e32 v0, v2, v0
53; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
54;
55; GFX9-GISEL-LABEL: test_ldexp_f16_i32:
56; GFX9-GISEL:       ; %bb.0:
57; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
59; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
60; GFX9-GISEL-NEXT:    v_med3_i32 v0, v3, v0, v1
61; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v0, v2, v0
62; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
63;
64; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
65; GFX11-GISEL:       ; %bb.0:
66; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
68; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
69; GFX11-GISEL-NEXT:    v_med3_i32 v0, 0xffff8000, v3, v0
70; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v2, v0
71; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
72  %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
73  ret half %result
74}
75
76; define <2 x half> @test_ldexp_v2f16_v2i16(ptr addrspace(1) %out, <2 x half> %a, <2 x i16> %b) #0 {
77;   %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
78;   ret <2 x half> %result
79; }
80
81define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a, <2 x i32> %b) #0 {
82; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32:
83; GFX8-SDAG:       ; %bb.0:
84; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX8-SDAG-NEXT:    s_movk_i32 s4, 0x8000
86; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7fff
87; GFX8-SDAG-NEXT:    v_med3_i32 v1, v3, s4, v0
88; GFX8-SDAG-NEXT:    v_med3_i32 v0, v4, s4, v0
89; GFX8-SDAG-NEXT:    v_ldexp_f16_e32 v1, v2, v1
90; GFX8-SDAG-NEXT:    v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
91; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v1, v0
92; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i32:
95; GFX9-SDAG:       ; %bb.0:
96; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x8000
98; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7fff
99; GFX9-SDAG-NEXT:    v_med3_i32 v1, v3, s4, v0
100; GFX9-SDAG-NEXT:    v_med3_i32 v0, v4, s4, v0
101; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v1, v2, v1
102; GFX9-SDAG-NEXT:    v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
103; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
104; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v1, s4
105; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
108; GFX11-SDAG:       ; %bb.0:
109; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
111; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
112; GFX11-SDAG-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
113; GFX11-SDAG-NEXT:    v_med3_i32 v1, v4, s0, 0x7fff
114; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
115; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
116; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
117; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v1
118; GFX11-SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
119; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
120;
121; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32:
122; GFX8-GISEL:       ; %bb.0:
123; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
125; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
126; GFX8-GISEL-NEXT:    v_med3_i32 v3, v3, v0, v1
127; GFX8-GISEL-NEXT:    v_med3_i32 v0, v4, v0, v1
128; GFX8-GISEL-NEXT:    v_ldexp_f16_e32 v3, v2, v3
129; GFX8-GISEL-NEXT:    v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
130; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v3, v0
131; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
132;
133; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i32:
134; GFX9-GISEL:       ; %bb.0:
135; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
137; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
138; GFX9-GISEL-NEXT:    v_med3_i32 v3, v3, v0, v1
139; GFX9-GISEL-NEXT:    v_med3_i32 v0, v4, v0, v1
140; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v3, v2, v3
141; GFX9-GISEL-NEXT:    v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
142; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v3
143; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
146; GFX11-GISEL:       ; %bb.0:
147; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
149; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
150; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v3, v0
151; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
152; GFX11-GISEL-NEXT:    v_med3_i32 v0, 0xffff8000, v4, v0
153; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v2, v1
154; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
155; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v3, v0
156; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
157; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
158; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
159; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
160  %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
161  ret <2 x half> %result
162}
163
164define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a, <3 x i32> %b) #0 {
165; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32:
166; GFX8-SDAG:       ; %bb.0:
167; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX8-SDAG-NEXT:    s_movk_i32 s4, 0x8000
169; GFX8-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7fff
170; GFX8-SDAG-NEXT:    v_med3_i32 v0, v4, s4, v1
171; GFX8-SDAG-NEXT:    v_med3_i32 v4, v5, s4, v1
172; GFX8-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
173; GFX8-SDAG-NEXT:    v_ldexp_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
174; GFX8-SDAG-NEXT:    v_med3_i32 v1, v6, s4, v1
175; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
176; GFX8-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v1
177; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i32:
180; GFX9-SDAG:       ; %bb.0:
181; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x8000
183; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7fff
184; GFX9-SDAG-NEXT:    v_med3_i32 v0, v4, s4, v1
185; GFX9-SDAG-NEXT:    v_med3_i32 v4, v5, s4, v1
186; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
187; GFX9-SDAG-NEXT:    v_ldexp_f16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
188; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0x5040100
189; GFX9-SDAG-NEXT:    v_med3_i32 v1, v6, s4, v1
190; GFX9-SDAG-NEXT:    v_perm_b32 v0, v2, v0, s5
191; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v1
192; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
193;
194; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
195; GFX11-SDAG:       ; %bb.0:
196; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
198; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
199; GFX11-SDAG-NEXT:    v_med3_i32 v0, v4, s0, 0x7fff
200; GFX11-SDAG-NEXT:    v_med3_i32 v1, v5, s0, 0x7fff
201; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
202; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
203; GFX11-SDAG-NEXT:    v_med3_i32 v2, v6, s0, 0x7fff
204; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
205; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v4, v1
206; GFX11-SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
207; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
208; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v2
209; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32:
212; GFX8-GISEL:       ; %bb.0:
213; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
215; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
216; GFX8-GISEL-NEXT:    v_med3_i32 v4, v4, v0, v1
217; GFX8-GISEL-NEXT:    v_med3_i32 v5, v5, v0, v1
218; GFX8-GISEL-NEXT:    v_ldexp_f16_e32 v4, v2, v4
219; GFX8-GISEL-NEXT:    v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
220; GFX8-GISEL-NEXT:    v_med3_i32 v0, v6, v0, v1
221; GFX8-GISEL-NEXT:    v_ldexp_f16_e32 v1, v3, v0
222; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v2
223; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i32:
226; GFX9-GISEL:       ; %bb.0:
227; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
229; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
230; GFX9-GISEL-NEXT:    v_med3_i32 v4, v4, v0, v1
231; GFX9-GISEL-NEXT:    v_med3_i32 v5, v5, v0, v1
232; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v4, v2, v4
233; GFX9-GISEL-NEXT:    v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
234; GFX9-GISEL-NEXT:    v_med3_i32 v0, v6, v0, v1
235; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v1, v3, v0
236; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v4
237; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
240; GFX11-GISEL:       ; %bb.0:
241; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
243; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
244; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v4, v0
245; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
246; GFX11-GISEL-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
247; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v2, v1
248; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
249; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v4, v5
250; GFX11-GISEL-NEXT:    v_med3_i32 v4, 0xffff8000, v6, v0
251; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
252; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
253; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
254; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v3, v4
255; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
256  %result = call <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
257  ret <3 x half> %result
258}
259
260define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a, <4 x i32> %b) #0 {
261; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32:
262; GFX8-SDAG:       ; %bb.0:
263; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264; GFX8-SDAG-NEXT:    s_movk_i32 s4, 0x8000
265; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7fff
266; GFX8-SDAG-NEXT:    v_med3_i32 v1, v7, s4, v0
267; GFX8-SDAG-NEXT:    v_med3_i32 v6, v6, s4, v0
268; GFX8-SDAG-NEXT:    v_med3_i32 v5, v5, s4, v0
269; GFX8-SDAG-NEXT:    v_med3_i32 v0, v4, s4, v0
270; GFX8-SDAG-NEXT:    v_ldexp_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
271; GFX8-SDAG-NEXT:    v_ldexp_f16_e32 v3, v3, v6
272; GFX8-SDAG-NEXT:    v_ldexp_f16_sdwa v5, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
273; GFX8-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
274; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v5
275; GFX8-SDAG-NEXT:    v_or_b32_e32 v1, v3, v1
276; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
277;
278; GFX9-SDAG-LABEL: test_ldexp_v4f16_v4i32:
279; GFX9-SDAG:       ; %bb.0:
280; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x8000
282; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7fff
283; GFX9-SDAG-NEXT:    v_med3_i32 v1, v6, s4, v0
284; GFX9-SDAG-NEXT:    v_med3_i32 v6, v7, s4, v0
285; GFX9-SDAG-NEXT:    v_med3_i32 v4, v4, s4, v0
286; GFX9-SDAG-NEXT:    v_med3_i32 v0, v5, s4, v0
287; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v1
288; GFX9-SDAG-NEXT:    v_ldexp_f16_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
289; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v4, v2, v4
290; GFX9-SDAG-NEXT:    v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
291; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
292; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v4, s4
293; GFX9-SDAG-NEXT:    v_perm_b32 v1, v3, v1, s4
294; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
297; GFX11-SDAG:       ; %bb.0:
298; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
300; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
301; GFX11-SDAG-NEXT:    v_med3_i32 v0, v6, s0, 0x7fff
302; GFX11-SDAG-NEXT:    v_med3_i32 v1, v7, s0, 0x7fff
303; GFX11-SDAG-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
304; GFX11-SDAG-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
305; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
306; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
307; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v3, v3, v0
308; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v4
309; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
310; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v2, v6, v5
311; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v7, v1
312; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
313; GFX11-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
314; GFX11-SDAG-NEXT:    v_perm_b32 v1, v1, v3, 0x5040100
315; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
316;
317; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32:
318; GFX8-GISEL:       ; %bb.0:
319; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
321; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
322; GFX8-GISEL-NEXT:    v_med3_i32 v4, v4, v0, v1
323; GFX8-GISEL-NEXT:    v_med3_i32 v5, v5, v0, v1
324; GFX8-GISEL-NEXT:    v_ldexp_f16_e32 v4, v2, v4
325; GFX8-GISEL-NEXT:    v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
326; GFX8-GISEL-NEXT:    v_med3_i32 v5, v6, v0, v1
327; GFX8-GISEL-NEXT:    v_med3_i32 v0, v7, v0, v1
328; GFX8-GISEL-NEXT:    v_ldexp_f16_e32 v5, v3, v5
329; GFX8-GISEL-NEXT:    v_ldexp_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
330; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v2
331; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v5, v1
332; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
333;
334; GFX9-GISEL-LABEL: test_ldexp_v4f16_v4i32:
335; GFX9-GISEL:       ; %bb.0:
336; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0xffff8000
338; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fff
339; GFX9-GISEL-NEXT:    v_med3_i32 v4, v4, v0, v1
340; GFX9-GISEL-NEXT:    v_med3_i32 v5, v5, v0, v1
341; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v4, v2, v4
342; GFX9-GISEL-NEXT:    v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
343; GFX9-GISEL-NEXT:    v_med3_i32 v5, v6, v0, v1
344; GFX9-GISEL-NEXT:    v_med3_i32 v0, v7, v0, v1
345; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v5, v3, v5
346; GFX9-GISEL-NEXT:    v_ldexp_f16_sdwa v1, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
347; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v4
348; GFX9-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v5
349; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
350;
351; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
352; GFX11-GISEL:       ; %bb.0:
353; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
355; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
356; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
357; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
358; GFX11-GISEL-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v0
359; GFX11-GISEL-NEXT:    v_med3_i32 v6, 0xffff8000, v6, v0
360; GFX11-GISEL-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
361; GFX11-GISEL-NEXT:    v_med3_i32 v0, 0xffff8000, v7, v0
362; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v2, v4
363; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
364; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v3, v3, v6
365; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v1, v5
366; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
367; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v4, v8, v0
368; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v2
369; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
370; GFX11-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
371; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
372; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
373; GFX11-GISEL-NEXT:    v_lshl_or_b32 v1, v4, 16, v2
374; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
375  %result = call <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
376  ret <4 x half> %result
377}
378
379declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) #1
380declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1
381declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) #1
382declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) #1
383declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) #1
384declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) #1
385
386attributes #0 = { strictfp }
387attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
388;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
389; GCN: {{.*}}
390; GFX11: {{.*}}
391; GFX8: {{.*}}
392; GFX9: {{.*}}
393