xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
3; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
4; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
8
9declare i32 @llvm.get.fpmode.i32()
10
11define i32 @func_fpmode_i32() {
12; GFX678-LABEL: func_fpmode_i32:
13; GFX678:       ; %bb.0:
14; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
16; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
17; GFX678-NEXT:    v_mov_b32_e32 v0, s4
18; GFX678-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX9-LABEL: func_fpmode_i32:
21; GFX9:       ; %bb.0:
22; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
24; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
25; GFX9-NEXT:    v_mov_b32_e32 v0, s4
26; GFX9-NEXT:    s_setpc_b64 s[30:31]
27;
28; GFX10-LABEL: func_fpmode_i32:
29; GFX10:       ; %bb.0:
30; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
32; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
33; GFX10-NEXT:    v_mov_b32_e32 v0, s4
34; GFX10-NEXT:    s_setpc_b64 s[30:31]
35;
36; GFX11-LABEL: func_fpmode_i32:
37; GFX11:       ; %bb.0:
38; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
40; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
41; GFX11-NEXT:    v_mov_b32_e32 v0, s0
42; GFX11-NEXT:    s_setpc_b64 s[30:31]
43  %fpmode = call i32 @llvm.get.fpmode.i32()
44  ret i32 %fpmode
45}
46
47define i32 @strictfp_func_fpmode_i32() strictfp {
48; GFX678-LABEL: strictfp_func_fpmode_i32:
49; GFX678:       ; %bb.0:
50; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
52; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
53; GFX678-NEXT:    v_mov_b32_e32 v0, s4
54; GFX678-NEXT:    s_setpc_b64 s[30:31]
55;
56; GFX9-LABEL: strictfp_func_fpmode_i32:
57; GFX9:       ; %bb.0:
58; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
60; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
61; GFX9-NEXT:    v_mov_b32_e32 v0, s4
62; GFX9-NEXT:    s_setpc_b64 s[30:31]
63;
64; GFX10-LABEL: strictfp_func_fpmode_i32:
65; GFX10:       ; %bb.0:
66; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
68; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
69; GFX10-NEXT:    v_mov_b32_e32 v0, s4
70; GFX10-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX11-LABEL: strictfp_func_fpmode_i32:
73; GFX11:       ; %bb.0:
74; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
76; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
77; GFX11-NEXT:    v_mov_b32_e32 v0, s0
78; GFX11-NEXT:    s_setpc_b64 s[30:31]
79  %fpmode = call i32 @llvm.get.fpmode.i32() strictfp
80  ret i32 %fpmode
81}
82
83define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) {
84; GFX6-LABEL: kernel_fpmode_i32:
85; GFX6:       ; %bb.0:
86; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
87; GFX6-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
88; GFX6-NEXT:    s_and_b32 s4, 0x7f3ff, s4
89; GFX6-NEXT:    s_mov_b32 s3, 0xf000
90; GFX6-NEXT:    s_mov_b32 s2, -1
91; GFX6-NEXT:    v_mov_b32_e32 v0, s4
92; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
93; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
94; GFX6-NEXT:    s_endpgm
95;
96; GFX7-LABEL: kernel_fpmode_i32:
97; GFX7:       ; %bb.0:
98; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
99; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
100; GFX7-NEXT:    s_and_b32 s4, 0x7f3ff, s4
101; GFX7-NEXT:    s_mov_b32 s3, 0xf000
102; GFX7-NEXT:    s_mov_b32 s2, -1
103; GFX7-NEXT:    v_mov_b32_e32 v0, s4
104; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
105; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
106; GFX7-NEXT:    s_endpgm
107;
108; GFX8-LABEL: kernel_fpmode_i32:
109; GFX8:       ; %bb.0:
110; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
111; GFX8-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19)
112; GFX8-NEXT:    s_and_b32 s2, 0x7f3ff, s2
113; GFX8-NEXT:    v_mov_b32_e32 v2, s2
114; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
115; GFX8-NEXT:    v_mov_b32_e32 v0, s0
116; GFX8-NEXT:    v_mov_b32_e32 v1, s1
117; GFX8-NEXT:    flat_store_dword v[0:1], v2
118; GFX8-NEXT:    s_endpgm
119;
120; GFX9-LABEL: kernel_fpmode_i32:
121; GFX9:       ; %bb.0:
122; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
123; GFX9-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
124; GFX9-NEXT:    s_and_b32 s2, 0x87f3ff, s2
125; GFX9-NEXT:    v_mov_b32_e32 v0, 0
126; GFX9-NEXT:    v_mov_b32_e32 v1, s2
127; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
128; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
129; GFX9-NEXT:    s_endpgm
130;
131; GFX10-LABEL: kernel_fpmode_i32:
132; GFX10:       ; %bb.0:
133; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
134; GFX10-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
135; GFX10-NEXT:    v_mov_b32_e32 v0, 0
136; GFX10-NEXT:    s_and_b32 s2, 0x87f3ff, s2
137; GFX10-NEXT:    v_mov_b32_e32 v1, s2
138; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
139; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
140; GFX10-NEXT:    s_endpgm
141;
142; GFX11-LABEL: kernel_fpmode_i32:
143; GFX11:       ; %bb.0:
144; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
145; GFX11-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
146; GFX11-NEXT:    s_and_b32 s2, 0x87f3ff, s2
147; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
148; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
149; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
150; GFX11-NEXT:    s_endpgm
151  %fpmode = call i32 @llvm.get.fpmode.i32()
152  store i32 %fpmode, ptr addrspace(1) %ptr
153  ret void
154}
155
156; TODO: We should be able to reduce the demanded bits and ask for less
157; from s_getreg_b32
158define i32 @func_fpmode_i32_denormonly() {
159; GFX678-LABEL: func_fpmode_i32_denormonly:
160; GFX678:       ; %bb.0:
161; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
163; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
164; GFX678-NEXT:    s_and_b32 s4, s4, 0xf0
165; GFX678-NEXT:    v_mov_b32_e32 v0, s4
166; GFX678-NEXT:    s_setpc_b64 s[30:31]
167;
168; GFX9-LABEL: func_fpmode_i32_denormonly:
169; GFX9:       ; %bb.0:
170; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
172; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
173; GFX9-NEXT:    s_and_b32 s4, s4, 0xf0
174; GFX9-NEXT:    v_mov_b32_e32 v0, s4
175; GFX9-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX10-LABEL: func_fpmode_i32_denormonly:
178; GFX10:       ; %bb.0:
179; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
181; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
182; GFX10-NEXT:    s_and_b32 s4, s4, 0xf0
183; GFX10-NEXT:    v_mov_b32_e32 v0, s4
184; GFX10-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX11-LABEL: func_fpmode_i32_denormonly:
187; GFX11:       ; %bb.0:
188; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
190; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
191; GFX11-NEXT:    s_and_b32 s0, s0, 0xf0
192; GFX11-NEXT:    v_mov_b32_e32 v0, s0
193; GFX11-NEXT:    s_setpc_b64 s[30:31]
194  %fpmode = call i32 @llvm.get.fpmode.i32()
195  %denorm.only = and i32 %fpmode, 240
196  ret i32 %denorm.only
197}
198
199define i32 @func_fpmode_i32_roundonly() {
200; GFX678-LABEL: func_fpmode_i32_roundonly:
201; GFX678:       ; %bb.0:
202; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
204; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
205; GFX678-NEXT:    s_and_b32 s4, s4, 15
206; GFX678-NEXT:    v_mov_b32_e32 v0, s4
207; GFX678-NEXT:    s_setpc_b64 s[30:31]
208;
209; GFX9-LABEL: func_fpmode_i32_roundonly:
210; GFX9:       ; %bb.0:
211; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
213; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
214; GFX9-NEXT:    s_and_b32 s4, s4, 15
215; GFX9-NEXT:    v_mov_b32_e32 v0, s4
216; GFX9-NEXT:    s_setpc_b64 s[30:31]
217;
218; GFX10-LABEL: func_fpmode_i32_roundonly:
219; GFX10:       ; %bb.0:
220; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
222; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
223; GFX10-NEXT:    s_and_b32 s4, s4, 15
224; GFX10-NEXT:    v_mov_b32_e32 v0, s4
225; GFX10-NEXT:    s_setpc_b64 s[30:31]
226;
227; GFX11-LABEL: func_fpmode_i32_roundonly:
228; GFX11:       ; %bb.0:
229; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
231; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
232; GFX11-NEXT:    s_and_b32 s0, s0, 15
233; GFX11-NEXT:    v_mov_b32_e32 v0, s0
234; GFX11-NEXT:    s_setpc_b64 s[30:31]
235  %fpmode = call i32 @llvm.get.fpmode.i32()
236  %round.only = and i32 %fpmode, 15
237  ret i32 %round.only
238}
239
240define i32 @func_fpmode_i32_round_denorm_only() {
241; GFX678-LABEL: func_fpmode_i32_round_denorm_only:
242; GFX678:       ; %bb.0:
243; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
245; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
246; GFX678-NEXT:    s_and_b32 s4, s4, 0xff
247; GFX678-NEXT:    v_mov_b32_e32 v0, s4
248; GFX678-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX9-LABEL: func_fpmode_i32_round_denorm_only:
251; GFX9:       ; %bb.0:
252; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
254; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
255; GFX9-NEXT:    s_and_b32 s4, s4, 0xff
256; GFX9-NEXT:    v_mov_b32_e32 v0, s4
257; GFX9-NEXT:    s_setpc_b64 s[30:31]
258;
259; GFX10-LABEL: func_fpmode_i32_round_denorm_only:
260; GFX10:       ; %bb.0:
261; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
263; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
264; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
265; GFX10-NEXT:    v_mov_b32_e32 v0, s4
266; GFX10-NEXT:    s_setpc_b64 s[30:31]
267;
268; GFX11-LABEL: func_fpmode_i32_round_denorm_only:
269; GFX11:       ; %bb.0:
270; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
271; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
272; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
273; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
274; GFX11-NEXT:    v_mov_b32_e32 v0, s0
275; GFX11-NEXT:    s_setpc_b64 s[30:31]
276  %fpmode = call i32 @llvm.get.fpmode.i32()
277  %round.denorm.only = and i32 %fpmode, 255
278  ret i32 %round.denorm.only
279}
280
281define i32 @func_fpmode_i32_round_denorm_dx10_ieee() {
282; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
283; GFX678:       ; %bb.0:
284; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
286; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
287; GFX678-NEXT:    s_and_b32 s4, s4, 0x3ff
288; GFX678-NEXT:    v_mov_b32_e32 v0, s4
289; GFX678-NEXT:    s_setpc_b64 s[30:31]
290;
291; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
292; GFX9:       ; %bb.0:
293; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
295; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
296; GFX9-NEXT:    s_and_b32 s4, s4, 0x3ff
297; GFX9-NEXT:    v_mov_b32_e32 v0, s4
298; GFX9-NEXT:    s_setpc_b64 s[30:31]
299;
300; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
301; GFX10:       ; %bb.0:
302; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
304; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
305; GFX10-NEXT:    s_and_b32 s4, s4, 0x3ff
306; GFX10-NEXT:    v_mov_b32_e32 v0, s4
307; GFX10-NEXT:    s_setpc_b64 s[30:31]
308;
309; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
310; GFX11:       ; %bb.0:
311; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
313; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
314; GFX11-NEXT:    s_and_b32 s0, s0, 0x3ff
315; GFX11-NEXT:    v_mov_b32_e32 v0, s0
316; GFX11-NEXT:    s_setpc_b64 s[30:31]
317  %fpmode = call i32 @llvm.get.fpmode.i32()
318  %core.mode = and i32 %fpmode, 1023
319  ret i32 %core.mode
320}
321
322define i32 @func_fpmode_i32_excp_en() {
323; GFX678-LABEL: func_fpmode_i32_excp_en:
324; GFX678:       ; %bb.0:
325; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
327; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
328; GFX678-NEXT:    s_and_b32 s4, s4, 0x7f000
329; GFX678-NEXT:    v_mov_b32_e32 v0, s4
330; GFX678-NEXT:    s_setpc_b64 s[30:31]
331;
332; GFX9-LABEL: func_fpmode_i32_excp_en:
333; GFX9:       ; %bb.0:
334; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
336; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
337; GFX9-NEXT:    s_and_b32 s4, s4, 0x7f000
338; GFX9-NEXT:    v_mov_b32_e32 v0, s4
339; GFX9-NEXT:    s_setpc_b64 s[30:31]
340;
341; GFX10-LABEL: func_fpmode_i32_excp_en:
342; GFX10:       ; %bb.0:
343; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
345; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
346; GFX10-NEXT:    s_and_b32 s4, s4, 0x7f000
347; GFX10-NEXT:    v_mov_b32_e32 v0, s4
348; GFX10-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX11-LABEL: func_fpmode_i32_excp_en:
351; GFX11:       ; %bb.0:
352; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
354; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
355; GFX11-NEXT:    s_and_b32 s0, s0, 0x7f000
356; GFX11-NEXT:    v_mov_b32_e32 v0, s0
357; GFX11-NEXT:    s_setpc_b64 s[30:31]
358  %fpmode = call i32 @llvm.get.fpmode.i32()
359  %core.mode = and i32 %fpmode, 520192
360  ret i32 %core.mode
361}
362
363; Mask for all bits used on gfx6+
364define i32 @func_fpmode_i32_environment_gfx6() {
365; GFX678-LABEL: func_fpmode_i32_environment_gfx6:
366; GFX678:       ; %bb.0:
367; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
369; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
370; GFX678-NEXT:    s_and_b32 s4, s4, 0x7f3ff
371; GFX678-NEXT:    v_mov_b32_e32 v0, s4
372; GFX678-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX9-LABEL: func_fpmode_i32_environment_gfx6:
375; GFX9:       ; %bb.0:
376; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
378; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
379; GFX9-NEXT:    s_and_b32 s4, s4, 0x7f3ff
380; GFX9-NEXT:    v_mov_b32_e32 v0, s4
381; GFX9-NEXT:    s_setpc_b64 s[30:31]
382;
383; GFX10-LABEL: func_fpmode_i32_environment_gfx6:
384; GFX10:       ; %bb.0:
385; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
387; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
388; GFX10-NEXT:    s_and_b32 s4, s4, 0x7f3ff
389; GFX10-NEXT:    v_mov_b32_e32 v0, s4
390; GFX10-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX11-LABEL: func_fpmode_i32_environment_gfx6:
393; GFX11:       ; %bb.0:
394; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
396; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
397; GFX11-NEXT:    s_and_b32 s0, s0, 0x7f3ff
398; GFX11-NEXT:    v_mov_b32_e32 v0, s0
399; GFX11-NEXT:    s_setpc_b64 s[30:31]
400  %fpmode = call i32 @llvm.get.fpmode.i32()
401  %core.mode = and i32 %fpmode, 521215
402  ret i32 %core.mode
403}
404
405; Mask for all bits used on gfx9+
406define i32 @func_fpmode_i32_environment_gfx9() {
407; GFX678-LABEL: func_fpmode_i32_environment_gfx9:
408; GFX678:       ; %bb.0:
409; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
411; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
412; GFX678-NEXT:    s_and_b32 s4, s4, 0x87f3ff
413; GFX678-NEXT:    v_mov_b32_e32 v0, s4
414; GFX678-NEXT:    s_setpc_b64 s[30:31]
415;
416; GFX9-LABEL: func_fpmode_i32_environment_gfx9:
417; GFX9:       ; %bb.0:
418; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
420; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
421; GFX9-NEXT:    s_and_b32 s4, s4, 0x87f3ff
422; GFX9-NEXT:    v_mov_b32_e32 v0, s4
423; GFX9-NEXT:    s_setpc_b64 s[30:31]
424;
425; GFX10-LABEL: func_fpmode_i32_environment_gfx9:
426; GFX10:       ; %bb.0:
427; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
429; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
430; GFX10-NEXT:    s_and_b32 s4, s4, 0x87f3ff
431; GFX10-NEXT:    v_mov_b32_e32 v0, s4
432; GFX10-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX11-LABEL: func_fpmode_i32_environment_gfx9:
435; GFX11:       ; %bb.0:
436; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
438; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
439; GFX11-NEXT:    s_and_b32 s0, s0, 0x87f3ff
440; GFX11-NEXT:    v_mov_b32_e32 v0, s0
441; GFX11-NEXT:    s_setpc_b64 s[30:31]
442  %fpmode = call i32 @llvm.get.fpmode.i32()
443  %core.mode = and i32 %fpmode, 8909823
444  ret i32 %core.mode
445}
446
447define i32 @func_fpmode_i32_denormf32only() {
448; GFX678-LABEL: func_fpmode_i32_denormf32only:
449; GFX678:       ; %bb.0:
450; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
452; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
453; GFX678-NEXT:    s_and_b32 s4, s4, 48
454; GFX678-NEXT:    v_mov_b32_e32 v0, s4
455; GFX678-NEXT:    s_setpc_b64 s[30:31]
456;
457; GFX9-LABEL: func_fpmode_i32_denormf32only:
458; GFX9:       ; %bb.0:
459; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
461; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
462; GFX9-NEXT:    s_and_b32 s4, s4, 48
463; GFX9-NEXT:    v_mov_b32_e32 v0, s4
464; GFX9-NEXT:    s_setpc_b64 s[30:31]
465;
466; GFX10-LABEL: func_fpmode_i32_denormf32only:
467; GFX10:       ; %bb.0:
468; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
470; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
471; GFX10-NEXT:    s_and_b32 s4, s4, 48
472; GFX10-NEXT:    v_mov_b32_e32 v0, s4
473; GFX10-NEXT:    s_setpc_b64 s[30:31]
474;
475; GFX11-LABEL: func_fpmode_i32_denormf32only:
476; GFX11:       ; %bb.0:
477; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
479; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
480; GFX11-NEXT:    s_and_b32 s0, s0, 48
481; GFX11-NEXT:    v_mov_b32_e32 v0, s0
482; GFX11-NEXT:    s_setpc_b64 s[30:31]
483  %fpmode = call i32 @llvm.get.fpmode.i32()
484  %denorm.only = and i32 %fpmode, 48
485  ret i32 %denorm.only
486}
487
488define i32 @func_fpmode_i32_denormf32only_0() {
489; GFX678-LABEL: func_fpmode_i32_denormf32only_0:
490; GFX678:       ; %bb.0:
491; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
493; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
494; GFX678-NEXT:    s_and_b32 s4, s4, 32
495; GFX678-NEXT:    v_mov_b32_e32 v0, s4
496; GFX678-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX9-LABEL: func_fpmode_i32_denormf32only_0:
499; GFX9:       ; %bb.0:
500; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
502; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
503; GFX9-NEXT:    s_and_b32 s4, s4, 32
504; GFX9-NEXT:    v_mov_b32_e32 v0, s4
505; GFX9-NEXT:    s_setpc_b64 s[30:31]
506;
507; GFX10-LABEL: func_fpmode_i32_denormf32only_0:
508; GFX10:       ; %bb.0:
509; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
511; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
512; GFX10-NEXT:    s_and_b32 s4, s4, 32
513; GFX10-NEXT:    v_mov_b32_e32 v0, s4
514; GFX10-NEXT:    s_setpc_b64 s[30:31]
515;
516; GFX11-LABEL: func_fpmode_i32_denormf32only_0:
517; GFX11:       ; %bb.0:
518; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
520; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
521; GFX11-NEXT:    s_and_b32 s0, s0, 32
522; GFX11-NEXT:    v_mov_b32_e32 v0, s0
523; GFX11-NEXT:    s_setpc_b64 s[30:31]
524  %fpmode = call i32 @llvm.get.fpmode.i32()
525  %denorm.only = and i32 %fpmode, 32
526  ret i32 %denorm.only
527}
528
529define i32 @func_fpmode_i32_denormf32only_1() {
530; GFX678-LABEL: func_fpmode_i32_denormf32only_1:
531; GFX678:       ; %bb.0:
532; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
534; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
535; GFX678-NEXT:    s_and_b32 s4, s4, 64
536; GFX678-NEXT:    v_mov_b32_e32 v0, s4
537; GFX678-NEXT:    s_setpc_b64 s[30:31]
538;
539; GFX9-LABEL: func_fpmode_i32_denormf32only_1:
540; GFX9:       ; %bb.0:
541; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
543; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
544; GFX9-NEXT:    s_and_b32 s4, s4, 64
545; GFX9-NEXT:    v_mov_b32_e32 v0, s4
546; GFX9-NEXT:    s_setpc_b64 s[30:31]
547;
548; GFX10-LABEL: func_fpmode_i32_denormf32only_1:
549; GFX10:       ; %bb.0:
550; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
552; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
553; GFX10-NEXT:    s_and_b32 s4, s4, 64
554; GFX10-NEXT:    v_mov_b32_e32 v0, s4
555; GFX10-NEXT:    s_setpc_b64 s[30:31]
556;
557; GFX11-LABEL: func_fpmode_i32_denormf32only_1:
558; GFX11:       ; %bb.0:
559; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
561; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
562; GFX11-NEXT:    s_and_b32 s0, s0, 64
563; GFX11-NEXT:    v_mov_b32_e32 v0, s0
564; GFX11-NEXT:    s_setpc_b64 s[30:31]
565  %fpmode = call i32 @llvm.get.fpmode.i32()
566  %denorm.only = and i32 %fpmode, 64
567  ret i32 %denorm.only
568}
569
570define i32 @func_fpmode_i32_denormf64f16only() {
571; GFX678-LABEL: func_fpmode_i32_denormf64f16only:
572; GFX678:       ; %bb.0:
573; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
575; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
576; GFX678-NEXT:    s_and_b32 s4, s4, 0xc0
577; GFX678-NEXT:    v_mov_b32_e32 v0, s4
578; GFX678-NEXT:    s_setpc_b64 s[30:31]
579;
580; GFX9-LABEL: func_fpmode_i32_denormf64f16only:
581; GFX9:       ; %bb.0:
582; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
584; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
585; GFX9-NEXT:    s_and_b32 s4, s4, 0xc0
586; GFX9-NEXT:    v_mov_b32_e32 v0, s4
587; GFX9-NEXT:    s_setpc_b64 s[30:31]
588;
589; GFX10-LABEL: func_fpmode_i32_denormf64f16only:
590; GFX10:       ; %bb.0:
591; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
593; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
594; GFX10-NEXT:    s_and_b32 s4, s4, 0xc0
595; GFX10-NEXT:    v_mov_b32_e32 v0, s4
596; GFX10-NEXT:    s_setpc_b64 s[30:31]
597;
598; GFX11-LABEL: func_fpmode_i32_denormf64f16only:
599; GFX11:       ; %bb.0:
600; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
602; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
603; GFX11-NEXT:    s_and_b32 s0, s0, 0xc0
604; GFX11-NEXT:    v_mov_b32_e32 v0, s0
605; GFX11-NEXT:    s_setpc_b64 s[30:31]
606  %fpmode = call i32 @llvm.get.fpmode.i32()
607  %denorm.only = and i32 %fpmode, 192
608  ret i32 %denorm.only
609}
610
611define i32 @func_fpmode_i32_dx10_clamp_only() {
612; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only:
613; GFX678:       ; %bb.0:
614; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
616; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
617; GFX678-NEXT:    s_and_b32 s4, s4, 0x100
618; GFX678-NEXT:    v_mov_b32_e32 v0, s4
619; GFX678-NEXT:    s_setpc_b64 s[30:31]
620;
621; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only:
622; GFX9:       ; %bb.0:
623; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
625; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
626; GFX9-NEXT:    s_and_b32 s4, s4, 0x100
627; GFX9-NEXT:    v_mov_b32_e32 v0, s4
628; GFX9-NEXT:    s_setpc_b64 s[30:31]
629;
630; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only:
631; GFX10:       ; %bb.0:
632; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
634; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
635; GFX10-NEXT:    s_and_b32 s4, s4, 0x100
636; GFX10-NEXT:    v_mov_b32_e32 v0, s4
637; GFX10-NEXT:    s_setpc_b64 s[30:31]
638;
639; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only:
640; GFX11:       ; %bb.0:
641; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
643; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
644; GFX11-NEXT:    s_and_b32 s0, s0, 0x100
645; GFX11-NEXT:    v_mov_b32_e32 v0, s0
646; GFX11-NEXT:    s_setpc_b64 s[30:31]
647  %fpmode = call i32 @llvm.get.fpmode.i32()
648  %dx10.only = and i32 %fpmode, 256
649  ret i32 %dx10.only
650}
651
652define i32 @func_fpmode_i32_ieee_only() {
653; GFX678-LABEL: func_fpmode_i32_ieee_only:
654; GFX678:       ; %bb.0:
655; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
657; GFX678-NEXT:    s_and_b32 s4, 0x7f3ff, s4
658; GFX678-NEXT:    s_and_b32 s4, s4, 0x200
659; GFX678-NEXT:    v_mov_b32_e32 v0, s4
660; GFX678-NEXT:    s_setpc_b64 s[30:31]
661;
662; GFX9-LABEL: func_fpmode_i32_ieee_only:
663; GFX9:       ; %bb.0:
664; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
666; GFX9-NEXT:    s_and_b32 s4, 0x87f3ff, s4
667; GFX9-NEXT:    s_and_b32 s4, s4, 0x200
668; GFX9-NEXT:    v_mov_b32_e32 v0, s4
669; GFX9-NEXT:    s_setpc_b64 s[30:31]
670;
671; GFX10-LABEL: func_fpmode_i32_ieee_only:
672; GFX10:       ; %bb.0:
673; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
675; GFX10-NEXT:    s_and_b32 s4, 0x87f3ff, s4
676; GFX10-NEXT:    s_and_b32 s4, s4, 0x200
677; GFX10-NEXT:    v_mov_b32_e32 v0, s4
678; GFX10-NEXT:    s_setpc_b64 s[30:31]
679;
680; GFX11-LABEL: func_fpmode_i32_ieee_only:
681; GFX11:       ; %bb.0:
682; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
684; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
685; GFX11-NEXT:    s_and_b32 s0, s0, 0x200
686; GFX11-NEXT:    v_mov_b32_e32 v0, s0
687; GFX11-NEXT:    s_setpc_b64 s[30:31]
688  %fpmode = call i32 @llvm.get.fpmode.i32()
689  %ieee.only = and i32 %fpmode, 512
690  ret i32 %ieee.only
691}
692
693;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
694; GCN: {{.*}}
695; GFX1011: {{.*}}
696