xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll (revision 9afaf9c6c89efb22bccab39677e8dff47da91a00)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
3; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
4; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
8
9declare void @llvm.set.rounding(i32)
10declare i32 @llvm.get.rounding()
11
12define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
13; GFX678-LABEL: s_set_rounding:
14; GFX678:       ; %bb.0:
15; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX678-NEXT:    s_add_i32 s34, s4, -4
17; GFX678-NEXT:    s_min_u32 s34, s4, s34
18; GFX678-NEXT:    s_lshl_b32 s36, s34, 2
19; GFX678-NEXT:    s_mov_b32 s34, 0x1c84a50f
20; GFX678-NEXT:    s_mov_b32 s35, 0xb73e62d9
21; GFX678-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
22; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
23; GFX678-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX9-LABEL: s_set_rounding:
26; GFX9:       ; %bb.0:
27; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX9-NEXT:    s_add_i32 s34, s4, -4
29; GFX9-NEXT:    s_min_u32 s34, s4, s34
30; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
31; GFX9-NEXT:    s_mov_b32 s34, 0x1c84a50f
32; GFX9-NEXT:    s_mov_b32 s35, 0xb73e62d9
33; GFX9-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
34; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
35; GFX9-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX10-LABEL: s_set_rounding:
38; GFX10:       ; %bb.0:
39; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX10-NEXT:    s_add_i32 s34, s4, -4
41; GFX10-NEXT:    s_min_u32 s36, s4, s34
42; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
43; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
44; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
45; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
46; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
47; GFX10-NEXT:    s_setpc_b64 s[30:31]
48;
49; GFX11-LABEL: s_set_rounding:
50; GFX11:       ; %bb.0:
51; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX11-NEXT:    s_add_i32 s0, s4, -4
53; GFX11-NEXT:    s_min_u32 s2, s4, s0
54; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
55; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
56; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
57; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
58; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
59; GFX11-NEXT:    s_setpc_b64 s[30:31]
60  call void @llvm.set.rounding(i32 %rounding)
61  ret void
62}
63
64define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
65; GFX6-LABEL: s_set_rounding_kernel:
66; GFX6:       ; %bb.0:
67; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x9
68; GFX6-NEXT:    s_mov_b32 s0, 0x1c84a50f
69; GFX6-NEXT:    s_mov_b32 s1, 0xb73e62d9
70; GFX6-NEXT:    ;;#ASMSTART
71; GFX6-NEXT:    ;;#ASMEND
72; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX6-NEXT:    s_add_i32 s3, s2, -4
74; GFX6-NEXT:    s_min_u32 s2, s2, s3
75; GFX6-NEXT:    s_lshl_b32 s2, s2, 2
76; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
77; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
78; GFX6-NEXT:    s_endpgm
79;
80; GFX7-LABEL: s_set_rounding_kernel:
81; GFX7:       ; %bb.0:
82; GFX7-NEXT:    s_load_dword s2, s[4:5], 0x9
83; GFX7-NEXT:    s_mov_b32 s0, 0x1c84a50f
84; GFX7-NEXT:    s_mov_b32 s1, 0xb73e62d9
85; GFX7-NEXT:    ;;#ASMSTART
86; GFX7-NEXT:    ;;#ASMEND
87; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
88; GFX7-NEXT:    s_add_i32 s3, s2, -4
89; GFX7-NEXT:    s_min_u32 s2, s2, s3
90; GFX7-NEXT:    s_lshl_b32 s2, s2, 2
91; GFX7-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
92; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
93; GFX7-NEXT:    s_endpgm
94;
95; GFX8-LABEL: s_set_rounding_kernel:
96; GFX8:       ; %bb.0:
97; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x24
98; GFX8-NEXT:    s_mov_b32 s0, 0x1c84a50f
99; GFX8-NEXT:    s_mov_b32 s1, 0xb73e62d9
100; GFX8-NEXT:    ;;#ASMSTART
101; GFX8-NEXT:    ;;#ASMEND
102; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX8-NEXT:    s_add_i32 s3, s2, -4
104; GFX8-NEXT:    s_min_u32 s2, s2, s3
105; GFX8-NEXT:    s_lshl_b32 s2, s2, 2
106; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
107; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
108; GFX8-NEXT:    s_endpgm
109;
110; GFX9-LABEL: s_set_rounding_kernel:
111; GFX9:       ; %bb.0:
112; GFX9-NEXT:    s_load_dword s2, s[4:5], 0x24
113; GFX9-NEXT:    s_mov_b32 s0, 0x1c84a50f
114; GFX9-NEXT:    s_mov_b32 s1, 0xb73e62d9
115; GFX9-NEXT:    ;;#ASMSTART
116; GFX9-NEXT:    ;;#ASMEND
117; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
118; GFX9-NEXT:    s_add_i32 s3, s2, -4
119; GFX9-NEXT:    s_min_u32 s2, s2, s3
120; GFX9-NEXT:    s_lshl_b32 s2, s2, 2
121; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
122; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
123; GFX9-NEXT:    s_endpgm
124;
125; GFX10-LABEL: s_set_rounding_kernel:
126; GFX10:       ; %bb.0:
127; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x24
128; GFX10-NEXT:    s_mov_b32 s0, 0x1c84a50f
129; GFX10-NEXT:    s_mov_b32 s1, 0xb73e62d9
130; GFX10-NEXT:    ;;#ASMSTART
131; GFX10-NEXT:    ;;#ASMEND
132; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
133; GFX10-NEXT:    s_add_i32 s3, s2, -4
134; GFX10-NEXT:    s_min_u32 s2, s2, s3
135; GFX10-NEXT:    s_lshl_b32 s2, s2, 2
136; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
137; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
138; GFX10-NEXT:    s_endpgm
139;
140; GFX11-LABEL: s_set_rounding_kernel:
141; GFX11:       ; %bb.0:
142; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x24
143; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
144; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
145; GFX11-NEXT:    ;;#ASMSTART
146; GFX11-NEXT:    ;;#ASMEND
147; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
148; GFX11-NEXT:    s_add_i32 s3, s2, -4
149; GFX11-NEXT:    s_min_u32 s2, s2, s3
150; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
151; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
152; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
153; GFX11-NEXT:    s_endpgm
154  call void @llvm.set.rounding(i32 %rounding)
155  call void asm sideeffect "",""()
156  ret void
157}
158
159define void @v_set_rounding(i32 %rounding) {
160; GFX6-LABEL: v_set_rounding:
161; GFX6:       ; %bb.0:
162; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -4, v0
164; GFX6-NEXT:    v_min_u32_e32 v0, v0, v1
165; GFX6-NEXT:    s_mov_b32 s4, 0x1c84a50f
166; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
167; GFX6-NEXT:    s_mov_b32 s5, 0xb73e62d9
168; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v0
169; GFX6-NEXT:    v_readfirstlane_b32 s4, v0
170; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
171; GFX6-NEXT:    s_setpc_b64 s[30:31]
172;
173; GFX7-LABEL: v_set_rounding:
174; GFX7:       ; %bb.0:
175; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX7-NEXT:    v_add_i32_e32 v1, vcc, -4, v0
177; GFX7-NEXT:    v_min_u32_e32 v0, v0, v1
178; GFX7-NEXT:    s_mov_b32 s4, 0x1c84a50f
179; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
180; GFX7-NEXT:    s_mov_b32 s5, 0xb73e62d9
181; GFX7-NEXT:    v_lshr_b64 v[0:1], s[4:5], v0
182; GFX7-NEXT:    v_readfirstlane_b32 s4, v0
183; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
184; GFX7-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX8-LABEL: v_set_rounding:
187; GFX8:       ; %bb.0:
188; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -4, v0
190; GFX8-NEXT:    v_min_u32_e32 v0, v0, v1
191; GFX8-NEXT:    s_mov_b32 s4, 0x1c84a50f
192; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
193; GFX8-NEXT:    s_mov_b32 s5, 0xb73e62d9
194; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, s[4:5]
195; GFX8-NEXT:    v_readfirstlane_b32 s4, v0
196; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
197; GFX8-NEXT:    s_setpc_b64 s[30:31]
198;
199; GFX9-LABEL: v_set_rounding:
200; GFX9:       ; %bb.0:
201; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202; GFX9-NEXT:    v_add_u32_e32 v1, -4, v0
203; GFX9-NEXT:    v_min_u32_e32 v0, v0, v1
204; GFX9-NEXT:    s_mov_b32 s4, 0x1c84a50f
205; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
206; GFX9-NEXT:    s_mov_b32 s5, 0xb73e62d9
207; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, s[4:5]
208; GFX9-NEXT:    v_readfirstlane_b32 s4, v0
209; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
210; GFX9-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX10-LABEL: v_set_rounding:
213; GFX10:       ; %bb.0:
214; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX10-NEXT:    v_add_nc_u32_e32 v1, -4, v0
216; GFX10-NEXT:    s_mov_b32 s4, 0x1c84a50f
217; GFX10-NEXT:    s_mov_b32 s5, 0xb73e62d9
218; GFX10-NEXT:    v_min_u32_e32 v0, v0, v1
219; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
220; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v0, s[4:5]
221; GFX10-NEXT:    v_readfirstlane_b32 s4, v0
222; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
223; GFX10-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX11-LABEL: v_set_rounding:
226; GFX11:       ; %bb.0:
227; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX11-NEXT:    v_add_nc_u32_e32 v1, -4, v0
229; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
230; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
231; GFX11-NEXT:    v_min_u32_e32 v0, v0, v1
232; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
233; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v0, s[0:1]
234; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
235; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
236; GFX11-NEXT:    s_setpc_b64 s[30:31]
237  call void @llvm.set.rounding(i32 %rounding)
238  ret void
239}
240
241define void @set_rounding_get_rounding() {
242; GFX678-LABEL: set_rounding_get_rounding:
243; GFX678:       ; %bb.0:
244; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
246; GFX678-NEXT:    s_lshl_b32 s6, s4, 2
247; GFX678-NEXT:    s_mov_b32 s4, 0xeb24da71
248; GFX678-NEXT:    s_mov_b32 s5, 0xc96f385
249; GFX678-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
250; GFX678-NEXT:    s_and_b32 s4, s4, 15
251; GFX678-NEXT:    s_add_i32 s5, s4, 4
252; GFX678-NEXT:    s_cmp_lt_u32 s4, 4
253; GFX678-NEXT:    s_cselect_b32 s4, s4, s5
254; GFX678-NEXT:    s_add_i32 s5, s4, -4
255; GFX678-NEXT:    s_min_u32 s4, s4, s5
256; GFX678-NEXT:    s_lshl_b32 s6, s4, 2
257; GFX678-NEXT:    s_mov_b32 s4, 0x1c84a50f
258; GFX678-NEXT:    s_mov_b32 s5, 0xb73e62d9
259; GFX678-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
260; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
261; GFX678-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX9-LABEL: set_rounding_get_rounding:
264; GFX9:       ; %bb.0:
265; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
267; GFX9-NEXT:    s_lshl_b32 s6, s4, 2
268; GFX9-NEXT:    s_mov_b32 s4, 0xeb24da71
269; GFX9-NEXT:    s_mov_b32 s5, 0xc96f385
270; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
271; GFX9-NEXT:    s_and_b32 s4, s4, 15
272; GFX9-NEXT:    s_add_i32 s5, s4, 4
273; GFX9-NEXT:    s_cmp_lt_u32 s4, 4
274; GFX9-NEXT:    s_cselect_b32 s4, s4, s5
275; GFX9-NEXT:    s_add_i32 s5, s4, -4
276; GFX9-NEXT:    s_min_u32 s4, s4, s5
277; GFX9-NEXT:    s_lshl_b32 s6, s4, 2
278; GFX9-NEXT:    s_mov_b32 s4, 0x1c84a50f
279; GFX9-NEXT:    s_mov_b32 s5, 0xb73e62d9
280; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
281; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
282; GFX9-NEXT:    s_setpc_b64 s[30:31]
283;
284; GFX10-LABEL: set_rounding_get_rounding:
285; GFX10:       ; %bb.0:
286; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287; GFX10-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
288; GFX10-NEXT:    s_mov_b32 s4, 0xeb24da71
289; GFX10-NEXT:    s_mov_b32 s5, 0xc96f385
290; GFX10-NEXT:    s_lshl_b32 s6, s6, 2
291; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
292; GFX10-NEXT:    s_and_b32 s4, s4, 15
293; GFX10-NEXT:    s_add_i32 s5, s4, 4
294; GFX10-NEXT:    s_cmp_lt_u32 s4, 4
295; GFX10-NEXT:    s_cselect_b32 s4, s4, s5
296; GFX10-NEXT:    s_add_i32 s5, s4, -4
297; GFX10-NEXT:    s_min_u32 s6, s4, s5
298; GFX10-NEXT:    s_mov_b32 s4, 0x1c84a50f
299; GFX10-NEXT:    s_mov_b32 s5, 0xb73e62d9
300; GFX10-NEXT:    s_lshl_b32 s6, s6, 2
301; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
302; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
303; GFX10-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX11-LABEL: set_rounding_get_rounding:
306; GFX11:       ; %bb.0:
307; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX11-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
309; GFX11-NEXT:    s_mov_b32 s0, 0xeb24da71
310; GFX11-NEXT:    s_mov_b32 s1, 0xc96f385
311; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
312; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
313; GFX11-NEXT:    s_and_b32 s0, s0, 15
314; GFX11-NEXT:    s_add_i32 s1, s0, 4
315; GFX11-NEXT:    s_cmp_lt_u32 s0, 4
316; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
317; GFX11-NEXT:    s_add_i32 s1, s0, -4
318; GFX11-NEXT:    s_min_u32 s2, s0, s1
319; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
320; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
321; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
322; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
323; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
324; GFX11-NEXT:    s_setpc_b64 s[30:31]
325  %rounding = call i32 @llvm.get.rounding()
326  call void @llvm.set.rounding(i32 %rounding)
327  ret void
328}
329
330define void @s_set_rounding_0() {
331; GFX678-LABEL: s_set_rounding_0:
332; GFX678:       ; %bb.0:
333; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
335; GFX678-NEXT:    s_setpc_b64 s[30:31]
336;
337; GFX9-LABEL: s_set_rounding_0:
338; GFX9:       ; %bb.0:
339; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
341; GFX9-NEXT:    s_setpc_b64 s[30:31]
342;
343; GFX1011-LABEL: s_set_rounding_0:
344; GFX1011:       ; %bb.0:
345; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346; GFX1011-NEXT:    s_round_mode 0xf
347; GFX1011-NEXT:    s_setpc_b64 s[30:31]
348  call void @llvm.set.rounding(i32 0)
349  ret void
350}
351
352define void @s_set_rounding_1() {
353; GFX678-LABEL: s_set_rounding_1:
354; GFX678:       ; %bb.0:
355; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
357; GFX678-NEXT:    s_setpc_b64 s[30:31]
358;
359; GFX9-LABEL: s_set_rounding_1:
360; GFX9:       ; %bb.0:
361; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
363; GFX9-NEXT:    s_setpc_b64 s[30:31]
364;
365; GFX1011-LABEL: s_set_rounding_1:
366; GFX1011:       ; %bb.0:
367; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368; GFX1011-NEXT:    s_round_mode 0x0
369; GFX1011-NEXT:    s_setpc_b64 s[30:31]
370  call void @llvm.set.rounding(i32 1)
371  ret void
372}
373
374define void @s_set_rounding_2() {
375; GFX678-LABEL: s_set_rounding_2:
376; GFX678:       ; %bb.0:
377; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
379; GFX678-NEXT:    s_setpc_b64 s[30:31]
380;
381; GFX9-LABEL: s_set_rounding_2:
382; GFX9:       ; %bb.0:
383; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
385; GFX9-NEXT:    s_setpc_b64 s[30:31]
386;
387; GFX1011-LABEL: s_set_rounding_2:
388; GFX1011:       ; %bb.0:
389; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; GFX1011-NEXT:    s_round_mode 0x5
391; GFX1011-NEXT:    s_setpc_b64 s[30:31]
392  call void @llvm.set.rounding(i32 2)
393  ret void
394}
395
396define void @s_set_rounding_3() {
397; GFX678-LABEL: s_set_rounding_3:
398; GFX678:       ; %bb.0:
399; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
401; GFX678-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX9-LABEL: s_set_rounding_3:
404; GFX9:       ; %bb.0:
405; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
407; GFX9-NEXT:    s_setpc_b64 s[30:31]
408;
409; GFX1011-LABEL: s_set_rounding_3:
410; GFX1011:       ; %bb.0:
411; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX1011-NEXT:    s_round_mode 0xa
413; GFX1011-NEXT:    s_setpc_b64 s[30:31]
414  call void @llvm.set.rounding(i32 3)
415  ret void
416}
417
418; Unsupported mode.
419define void @s_set_rounding_4() {
420; GFX678-LABEL: s_set_rounding_4:
421; GFX678:       ; %bb.0:
422; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
424; GFX678-NEXT:    s_setpc_b64 s[30:31]
425;
426; GFX9-LABEL: s_set_rounding_4:
427; GFX9:       ; %bb.0:
428; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
430; GFX9-NEXT:    s_setpc_b64 s[30:31]
431;
432; GFX1011-LABEL: s_set_rounding_4:
433; GFX1011:       ; %bb.0:
434; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435; GFX1011-NEXT:    s_round_mode 0xf
436; GFX1011-NEXT:    s_setpc_b64 s[30:31]
437  call void @llvm.set.rounding(i32 4)
438  ret void
439}
440
441; undefined
442define void @s_set_rounding_5() {
443; GFX678-LABEL: s_set_rounding_5:
444; GFX678:       ; %bb.0:
445; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
447; GFX678-NEXT:    s_setpc_b64 s[30:31]
448;
449; GFX9-LABEL: s_set_rounding_5:
450; GFX9:       ; %bb.0:
451; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
453; GFX9-NEXT:    s_setpc_b64 s[30:31]
454;
455; GFX1011-LABEL: s_set_rounding_5:
456; GFX1011:       ; %bb.0:
457; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
458; GFX1011-NEXT:    s_round_mode 0x0
459; GFX1011-NEXT:    s_setpc_b64 s[30:31]
460  call void @llvm.set.rounding(i32 5)
461  ret void
462}
463
464; undefined
465define void @s_set_rounding_6() {
466; GFX678-LABEL: s_set_rounding_6:
467; GFX678:       ; %bb.0:
468; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
470; GFX678-NEXT:    s_setpc_b64 s[30:31]
471;
472; GFX9-LABEL: s_set_rounding_6:
473; GFX9:       ; %bb.0:
474; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
476; GFX9-NEXT:    s_setpc_b64 s[30:31]
477;
478; GFX1011-LABEL: s_set_rounding_6:
479; GFX1011:       ; %bb.0:
480; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481; GFX1011-NEXT:    s_round_mode 0x5
482; GFX1011-NEXT:    s_setpc_b64 s[30:31]
483  call void @llvm.set.rounding(i32 6)
484  ret void
485}
486
487; "Dynamic"
488define void @s_set_rounding_7() {
489; GFX678-LABEL: s_set_rounding_7:
490; GFX678:       ; %bb.0:
491; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
493; GFX678-NEXT:    s_setpc_b64 s[30:31]
494;
495; GFX9-LABEL: s_set_rounding_7:
496; GFX9:       ; %bb.0:
497; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
499; GFX9-NEXT:    s_setpc_b64 s[30:31]
500;
501; GFX1011-LABEL: s_set_rounding_7:
502; GFX1011:       ; %bb.0:
503; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504; GFX1011-NEXT:    s_round_mode 0xa
505; GFX1011-NEXT:    s_setpc_b64 s[30:31]
506  call void @llvm.set.rounding(i32 7)
507  ret void
508}
509
510; Invalid
511define void @s_set_rounding_neg1() {
512; GFX678-LABEL: s_set_rounding_neg1:
513; GFX678:       ; %bb.0:
514; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
516; GFX678-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX9-LABEL: s_set_rounding_neg1:
519; GFX9:       ; %bb.0:
520; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
522; GFX9-NEXT:    s_setpc_b64 s[30:31]
523;
524; GFX1011-LABEL: s_set_rounding_neg1:
525; GFX1011:       ; %bb.0:
526; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527; GFX1011-NEXT:    s_round_mode 0xb
528; GFX1011-NEXT:    s_setpc_b64 s[30:31]
529  call void @llvm.set.rounding(i32 -1)
530  ret void
531}
532
533; --------------------------------------------------------------------
534; Test extended values
535; --------------------------------------------------------------------
536
537; NearestTiesToEvenF32_TowardPositiveF64 = 8
538define void @s_set_rounding_8() {
539; GFX678-LABEL: s_set_rounding_8:
540; GFX678:       ; %bb.0:
541; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
543; GFX678-NEXT:    s_setpc_b64 s[30:31]
544;
545; GFX9-LABEL: s_set_rounding_8:
546; GFX9:       ; %bb.0:
547; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
549; GFX9-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX1011-LABEL: s_set_rounding_8:
552; GFX1011:       ; %bb.0:
553; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX1011-NEXT:    s_round_mode 0x4
555; GFX1011-NEXT:    s_setpc_b64 s[30:31]
556  call void @llvm.set.rounding(i32 8)
557  ret void
558}
559
560;  NearestTiesToEvenF32_TowardNegativeF64 = 9
561define void @s_set_rounding_9() {
562; GFX678-LABEL: s_set_rounding_9:
563; GFX678:       ; %bb.0:
564; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
566; GFX678-NEXT:    s_setpc_b64 s[30:31]
567;
568; GFX9-LABEL: s_set_rounding_9:
569; GFX9:       ; %bb.0:
570; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
572; GFX9-NEXT:    s_setpc_b64 s[30:31]
573;
574; GFX1011-LABEL: s_set_rounding_9:
575; GFX1011:       ; %bb.0:
576; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577; GFX1011-NEXT:    s_round_mode 0x8
578; GFX1011-NEXT:    s_setpc_b64 s[30:31]
579  call void @llvm.set.rounding(i32 9)
580  ret void
581}
582
583; NearestTiesToEvenF32_TowardZeroF64 = 10
584define void @s_set_rounding_10() {
585; GFX678-LABEL: s_set_rounding_10:
586; GFX678:       ; %bb.0:
587; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
589; GFX678-NEXT:    s_setpc_b64 s[30:31]
590;
591; GFX9-LABEL: s_set_rounding_10:
592; GFX9:       ; %bb.0:
593; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
595; GFX9-NEXT:    s_setpc_b64 s[30:31]
596;
597; GFX1011-LABEL: s_set_rounding_10:
598; GFX1011:       ; %bb.0:
599; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; GFX1011-NEXT:    s_round_mode 0xc
601; GFX1011-NEXT:    s_setpc_b64 s[30:31]
602  call void @llvm.set.rounding(i32 10)
603  ret void
604}
605
606; TowardPositiveF32_NearestTiesToEvenF64 = 11
607define void @s_set_rounding_11() {
608; GFX678-LABEL: s_set_rounding_11:
609; GFX678:       ; %bb.0:
610; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
612; GFX678-NEXT:    s_setpc_b64 s[30:31]
613;
614; GFX9-LABEL: s_set_rounding_11:
615; GFX9:       ; %bb.0:
616; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
618; GFX9-NEXT:    s_setpc_b64 s[30:31]
619;
620; GFX1011-LABEL: s_set_rounding_11:
621; GFX1011:       ; %bb.0:
622; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623; GFX1011-NEXT:    s_round_mode 0x1
624; GFX1011-NEXT:    s_setpc_b64 s[30:31]
625  call void @llvm.set.rounding(i32 11)
626  ret void
627}
628
629; TowardPositiveF32_TowardNegativeF64 = 12
630define void @s_set_rounding_12() {
631; GFX678-LABEL: s_set_rounding_12:
632; GFX678:       ; %bb.0:
633; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
635; GFX678-NEXT:    s_setpc_b64 s[30:31]
636;
637; GFX9-LABEL: s_set_rounding_12:
638; GFX9:       ; %bb.0:
639; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
641; GFX9-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX1011-LABEL: s_set_rounding_12:
644; GFX1011:       ; %bb.0:
645; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX1011-NEXT:    s_round_mode 0x9
647; GFX1011-NEXT:    s_setpc_b64 s[30:31]
648  call void @llvm.set.rounding(i32 12)
649  ret void
650}
651
652; TowardPositiveF32_TowardZeroF64 = 13
653define void @s_set_rounding_13() {
654; GFX678-LABEL: s_set_rounding_13:
655; GFX678:       ; %bb.0:
656; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
658; GFX678-NEXT:    s_setpc_b64 s[30:31]
659;
660; GFX9-LABEL: s_set_rounding_13:
661; GFX9:       ; %bb.0:
662; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
664; GFX9-NEXT:    s_setpc_b64 s[30:31]
665;
666; GFX1011-LABEL: s_set_rounding_13:
667; GFX1011:       ; %bb.0:
668; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669; GFX1011-NEXT:    s_round_mode 0xd
670; GFX1011-NEXT:    s_setpc_b64 s[30:31]
671  call void @llvm.set.rounding(i32 13)
672  ret void
673}
674
675;   TowardNegativeF32_NearestTiesToEvenF64 = 14
676define void @s_set_rounding_14() {
677; GFX678-LABEL: s_set_rounding_14:
678; GFX678:       ; %bb.0:
679; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
681; GFX678-NEXT:    s_setpc_b64 s[30:31]
682;
683; GFX9-LABEL: s_set_rounding_14:
684; GFX9:       ; %bb.0:
685; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
686; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
687; GFX9-NEXT:    s_setpc_b64 s[30:31]
688;
689; GFX1011-LABEL: s_set_rounding_14:
690; GFX1011:       ; %bb.0:
691; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692; GFX1011-NEXT:    s_round_mode 0x2
693; GFX1011-NEXT:    s_setpc_b64 s[30:31]
694  call void @llvm.set.rounding(i32 14)
695  ret void
696}
697
698; TowardNegativeF32_TowardPositiveF64 = 15
699define void @s_set_rounding_15() {
700; GFX678-LABEL: s_set_rounding_15:
701; GFX678:       ; %bb.0:
702; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
704; GFX678-NEXT:    s_setpc_b64 s[30:31]
705;
706; GFX9-LABEL: s_set_rounding_15:
707; GFX9:       ; %bb.0:
708; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
709; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
710; GFX9-NEXT:    s_setpc_b64 s[30:31]
711;
712; GFX1011-LABEL: s_set_rounding_15:
713; GFX1011:       ; %bb.0:
714; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
715; GFX1011-NEXT:    s_round_mode 0x6
716; GFX1011-NEXT:    s_setpc_b64 s[30:31]
717  call void @llvm.set.rounding(i32 15)
718  ret void
719}
720
721
722; TowardNegativeF32_TowardZeroF64 = 16
723define void @s_set_rounding_16() {
724; GFX678-LABEL: s_set_rounding_16:
725; GFX678:       ; %bb.0:
726; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
727; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
728; GFX678-NEXT:    s_setpc_b64 s[30:31]
729;
730; GFX9-LABEL: s_set_rounding_16:
731; GFX9:       ; %bb.0:
732; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
734; GFX9-NEXT:    s_setpc_b64 s[30:31]
735;
736; GFX1011-LABEL: s_set_rounding_16:
737; GFX1011:       ; %bb.0:
738; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
739; GFX1011-NEXT:    s_round_mode 0xe
740; GFX1011-NEXT:    s_setpc_b64 s[30:31]
741  call void @llvm.set.rounding(i32 16)
742  ret void
743}
744
745;  TowardZeroF32_NearestTiesToEvenF64 = 17
746define void @s_set_rounding_17() {
747; GFX678-LABEL: s_set_rounding_17:
748; GFX678:       ; %bb.0:
749; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
751; GFX678-NEXT:    s_setpc_b64 s[30:31]
752;
753; GFX9-LABEL: s_set_rounding_17:
754; GFX9:       ; %bb.0:
755; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
757; GFX9-NEXT:    s_setpc_b64 s[30:31]
758;
759; GFX1011-LABEL: s_set_rounding_17:
760; GFX1011:       ; %bb.0:
761; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762; GFX1011-NEXT:    s_round_mode 0x3
763; GFX1011-NEXT:    s_setpc_b64 s[30:31]
764  call void @llvm.set.rounding(i32 17)
765  ret void
766}
767
768; TowardZeroF32_TowardPositiveF64 = 18
769define void @s_set_rounding_18() {
770; GFX678-LABEL: s_set_rounding_18:
771; GFX678:       ; %bb.0:
772; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
774; GFX678-NEXT:    s_setpc_b64 s[30:31]
775;
776; GFX9-LABEL: s_set_rounding_18:
777; GFX9:       ; %bb.0:
778; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
780; GFX9-NEXT:    s_setpc_b64 s[30:31]
781;
782; GFX1011-LABEL: s_set_rounding_18:
783; GFX1011:       ; %bb.0:
784; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785; GFX1011-NEXT:    s_round_mode 0x7
786; GFX1011-NEXT:    s_setpc_b64 s[30:31]
787  call void @llvm.set.rounding(i32 18)
788  ret void
789}
790
791; TowardZeroF32_TowardNegativeF64 = 19,
792define void @s_set_rounding_19() {
793; GFX678-LABEL: s_set_rounding_19:
794; GFX678:       ; %bb.0:
795; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
797; GFX678-NEXT:    s_setpc_b64 s[30:31]
798;
799; GFX9-LABEL: s_set_rounding_19:
800; GFX9:       ; %bb.0:
801; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
803; GFX9-NEXT:    s_setpc_b64 s[30:31]
804;
805; GFX1011-LABEL: s_set_rounding_19:
806; GFX1011:       ; %bb.0:
807; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808; GFX1011-NEXT:    s_round_mode 0xb
809; GFX1011-NEXT:    s_setpc_b64 s[30:31]
810  call void @llvm.set.rounding(i32 19)
811  ret void
812}
813
814; Invalid, out of bounds
815define void @s_set_rounding_20() {
816; GFX678-LABEL: s_set_rounding_20:
817; GFX678:       ; %bb.0:
818; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
819; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
820; GFX678-NEXT:    s_setpc_b64 s[30:31]
821;
822; GFX9-LABEL: s_set_rounding_20:
823; GFX9:       ; %bb.0:
824; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
826; GFX9-NEXT:    s_setpc_b64 s[30:31]
827;
828; GFX1011-LABEL: s_set_rounding_20:
829; GFX1011:       ; %bb.0:
830; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831; GFX1011-NEXT:    s_round_mode 0xb
832; GFX1011-NEXT:    s_setpc_b64 s[30:31]
833  call void @llvm.set.rounding(i32 20)
834  ret void
835}
836
837define void @s_set_rounding_0xffff() {
838; GFX678-LABEL: s_set_rounding_0xffff:
839; GFX678:       ; %bb.0:
840; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841; GFX678-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
842; GFX678-NEXT:    s_setpc_b64 s[30:31]
843;
844; GFX9-LABEL: s_set_rounding_0xffff:
845; GFX9:       ; %bb.0:
846; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
848; GFX9-NEXT:    s_setpc_b64 s[30:31]
849;
850; GFX1011-LABEL: s_set_rounding_0xffff:
851; GFX1011:       ; %bb.0:
852; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853; GFX1011-NEXT:    s_round_mode 0xb
854; GFX1011-NEXT:    s_setpc_b64 s[30:31]
855  call void @llvm.set.rounding(i32 65535)
856  ret void
857}
858
859; --------------------------------------------------------------------
860; Test optimization knowing the value can only be in the standard
861; range
862; --------------------------------------------------------------------
863
864define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) {
865; GFX6-LABEL: s_set_rounding_i2_zeroext:
866; GFX6:       ; %bb.0:
867; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868; GFX6-NEXT:    s_lshl_b32 s34, s4, 2
869; GFX6-NEXT:    s_lshr_b32 s34, 0xa50f, s34
870; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
871; GFX6-NEXT:    s_setpc_b64 s[30:31]
872;
873; GFX7-LABEL: s_set_rounding_i2_zeroext:
874; GFX7:       ; %bb.0:
875; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876; GFX7-NEXT:    s_lshl_b32 s34, s4, 2
877; GFX7-NEXT:    s_lshr_b32 s34, 0xa50f, s34
878; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
879; GFX7-NEXT:    s_setpc_b64 s[30:31]
880;
881; GFX8-LABEL: s_set_rounding_i2_zeroext:
882; GFX8:       ; %bb.0:
883; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884; GFX8-NEXT:    s_and_b32 s34, 0xffff, s4
885; GFX8-NEXT:    s_lshl_b32 s34, s34, 2
886; GFX8-NEXT:    s_lshr_b32 s34, 0xa50f, s34
887; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
888; GFX8-NEXT:    s_setpc_b64 s[30:31]
889;
890; GFX9-LABEL: s_set_rounding_i2_zeroext:
891; GFX9:       ; %bb.0:
892; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
893; GFX9-NEXT:    s_and_b32 s34, 0xffff, s4
894; GFX9-NEXT:    s_lshl_b32 s34, s34, 2
895; GFX9-NEXT:    s_lshr_b32 s34, 0xa50f, s34
896; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
897; GFX9-NEXT:    s_setpc_b64 s[30:31]
898;
899; GFX10-LABEL: s_set_rounding_i2_zeroext:
900; GFX10:       ; %bb.0:
901; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902; GFX10-NEXT:    s_and_b32 s34, 0xffff, s4
903; GFX10-NEXT:    s_lshl_b32 s34, s34, 2
904; GFX10-NEXT:    s_lshr_b32 s34, 0xa50f, s34
905; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
906; GFX10-NEXT:    s_setpc_b64 s[30:31]
907;
908; GFX11-LABEL: s_set_rounding_i2_zeroext:
909; GFX11:       ; %bb.0:
910; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911; GFX11-NEXT:    s_and_b32 s0, 0xffff, s4
912; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
913; GFX11-NEXT:    s_lshr_b32 s0, 0xa50f, s0
914; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
915; GFX11-NEXT:    s_setpc_b64 s[30:31]
916  %zext.rounding = zext i2 %rounding to i32
917  call void @llvm.set.rounding(i32 %zext.rounding)
918  ret void
919}
920
921define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
922; GFX6-LABEL: s_set_rounding_i2_signext:
923; GFX6:       ; %bb.0:
924; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925; GFX6-NEXT:    s_add_i32 s34, s4, -4
926; GFX6-NEXT:    s_min_u32 s34, s4, s34
927; GFX6-NEXT:    s_lshl_b32 s36, s34, 2
928; GFX6-NEXT:    s_mov_b32 s34, 0x1c84a50f
929; GFX6-NEXT:    s_mov_b32 s35, 0xb73e62d9
930; GFX6-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
931; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
932; GFX6-NEXT:    s_setpc_b64 s[30:31]
933;
934; GFX7-LABEL: s_set_rounding_i2_signext:
935; GFX7:       ; %bb.0:
936; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937; GFX7-NEXT:    s_add_i32 s34, s4, -4
938; GFX7-NEXT:    s_min_u32 s34, s4, s34
939; GFX7-NEXT:    s_lshl_b32 s36, s34, 2
940; GFX7-NEXT:    s_mov_b32 s34, 0x1c84a50f
941; GFX7-NEXT:    s_mov_b32 s35, 0xb73e62d9
942; GFX7-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
943; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
944; GFX7-NEXT:    s_setpc_b64 s[30:31]
945;
946; GFX8-LABEL: s_set_rounding_i2_signext:
947; GFX8:       ; %bb.0:
948; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949; GFX8-NEXT:    s_sext_i32_i16 s34, s4
950; GFX8-NEXT:    s_add_i32 s35, s34, -4
951; GFX8-NEXT:    s_min_u32 s34, s34, s35
952; GFX8-NEXT:    s_lshl_b32 s36, s34, 2
953; GFX8-NEXT:    s_mov_b32 s34, 0x1c84a50f
954; GFX8-NEXT:    s_mov_b32 s35, 0xb73e62d9
955; GFX8-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
956; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
957; GFX8-NEXT:    s_setpc_b64 s[30:31]
958;
959; GFX9-LABEL: s_set_rounding_i2_signext:
960; GFX9:       ; %bb.0:
961; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962; GFX9-NEXT:    s_sext_i32_i16 s34, s4
963; GFX9-NEXT:    s_add_i32 s35, s34, -4
964; GFX9-NEXT:    s_min_u32 s34, s34, s35
965; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
966; GFX9-NEXT:    s_mov_b32 s34, 0x1c84a50f
967; GFX9-NEXT:    s_mov_b32 s35, 0xb73e62d9
968; GFX9-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
969; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
970; GFX9-NEXT:    s_setpc_b64 s[30:31]
971;
972; GFX10-LABEL: s_set_rounding_i2_signext:
973; GFX10:       ; %bb.0:
974; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975; GFX10-NEXT:    s_sext_i32_i16 s34, s4
976; GFX10-NEXT:    s_add_i32 s35, s34, -4
977; GFX10-NEXT:    s_min_u32 s36, s34, s35
978; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
979; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
980; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
981; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
982; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
983; GFX10-NEXT:    s_setpc_b64 s[30:31]
984;
985; GFX11-LABEL: s_set_rounding_i2_signext:
986; GFX11:       ; %bb.0:
987; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988; GFX11-NEXT:    s_sext_i32_i16 s0, s4
989; GFX11-NEXT:    s_add_i32 s1, s0, -4
990; GFX11-NEXT:    s_min_u32 s2, s0, s1
991; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
992; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
993; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
994; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
995; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
996; GFX11-NEXT:    s_setpc_b64 s[30:31]
997  %sext.rounding = sext i2 %rounding to i32
998  call void @llvm.set.rounding(i32 %sext.rounding)
999  ret void
1000}
1001
1002define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
1003; GFX6-LABEL: s_set_rounding_i3_signext:
1004; GFX6:       ; %bb.0:
1005; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1006; GFX6-NEXT:    s_add_i32 s34, s4, -4
1007; GFX6-NEXT:    s_min_u32 s34, s4, s34
1008; GFX6-NEXT:    s_lshl_b32 s36, s34, 2
1009; GFX6-NEXT:    s_mov_b32 s34, 0x1c84a50f
1010; GFX6-NEXT:    s_mov_b32 s35, 0xb73e62d9
1011; GFX6-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1012; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1013; GFX6-NEXT:    s_setpc_b64 s[30:31]
1014;
1015; GFX7-LABEL: s_set_rounding_i3_signext:
1016; GFX7:       ; %bb.0:
1017; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1018; GFX7-NEXT:    s_add_i32 s34, s4, -4
1019; GFX7-NEXT:    s_min_u32 s34, s4, s34
1020; GFX7-NEXT:    s_lshl_b32 s36, s34, 2
1021; GFX7-NEXT:    s_mov_b32 s34, 0x1c84a50f
1022; GFX7-NEXT:    s_mov_b32 s35, 0xb73e62d9
1023; GFX7-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1024; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1025; GFX7-NEXT:    s_setpc_b64 s[30:31]
1026;
1027; GFX8-LABEL: s_set_rounding_i3_signext:
1028; GFX8:       ; %bb.0:
1029; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030; GFX8-NEXT:    s_sext_i32_i16 s34, s4
1031; GFX8-NEXT:    s_add_i32 s35, s34, -4
1032; GFX8-NEXT:    s_min_u32 s34, s34, s35
1033; GFX8-NEXT:    s_lshl_b32 s36, s34, 2
1034; GFX8-NEXT:    s_mov_b32 s34, 0x1c84a50f
1035; GFX8-NEXT:    s_mov_b32 s35, 0xb73e62d9
1036; GFX8-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1037; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1038; GFX8-NEXT:    s_setpc_b64 s[30:31]
1039;
1040; GFX9-LABEL: s_set_rounding_i3_signext:
1041; GFX9:       ; %bb.0:
1042; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1043; GFX9-NEXT:    s_sext_i32_i16 s34, s4
1044; GFX9-NEXT:    s_add_i32 s35, s34, -4
1045; GFX9-NEXT:    s_min_u32 s34, s34, s35
1046; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
1047; GFX9-NEXT:    s_mov_b32 s34, 0x1c84a50f
1048; GFX9-NEXT:    s_mov_b32 s35, 0xb73e62d9
1049; GFX9-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1050; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1051; GFX9-NEXT:    s_setpc_b64 s[30:31]
1052;
1053; GFX10-LABEL: s_set_rounding_i3_signext:
1054; GFX10:       ; %bb.0:
1055; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1056; GFX10-NEXT:    s_sext_i32_i16 s34, s4
1057; GFX10-NEXT:    s_add_i32 s35, s34, -4
1058; GFX10-NEXT:    s_min_u32 s36, s34, s35
1059; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
1060; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
1061; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
1062; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1063; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1064; GFX10-NEXT:    s_setpc_b64 s[30:31]
1065;
1066; GFX11-LABEL: s_set_rounding_i3_signext:
1067; GFX11:       ; %bb.0:
1068; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069; GFX11-NEXT:    s_sext_i32_i16 s0, s4
1070; GFX11-NEXT:    s_add_i32 s1, s0, -4
1071; GFX11-NEXT:    s_min_u32 s2, s0, s1
1072; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
1073; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
1074; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
1075; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1076; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1077; GFX11-NEXT:    s_setpc_b64 s[30:31]
1078  %sext.rounding = sext i3 %rounding to i32
1079  call void @llvm.set.rounding(i32 %sext.rounding)
1080  ret void
1081}
1082
1083define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
1084; GFX6-LABEL: s_set_rounding_i3_zeroext:
1085; GFX6:       ; %bb.0:
1086; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087; GFX6-NEXT:    s_add_i32 s34, s4, -4
1088; GFX6-NEXT:    s_min_u32 s34, s4, s34
1089; GFX6-NEXT:    s_lshl_b32 s36, s34, 2
1090; GFX6-NEXT:    s_mov_b32 s34, 0x1c84a50f
1091; GFX6-NEXT:    s_mov_b32 s35, 0xb73e62d9
1092; GFX6-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1093; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1094; GFX6-NEXT:    s_setpc_b64 s[30:31]
1095;
1096; GFX7-LABEL: s_set_rounding_i3_zeroext:
1097; GFX7:       ; %bb.0:
1098; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099; GFX7-NEXT:    s_add_i32 s34, s4, -4
1100; GFX7-NEXT:    s_min_u32 s34, s4, s34
1101; GFX7-NEXT:    s_lshl_b32 s36, s34, 2
1102; GFX7-NEXT:    s_mov_b32 s34, 0x1c84a50f
1103; GFX7-NEXT:    s_mov_b32 s35, 0xb73e62d9
1104; GFX7-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1105; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1106; GFX7-NEXT:    s_setpc_b64 s[30:31]
1107;
1108; GFX8-LABEL: s_set_rounding_i3_zeroext:
1109; GFX8:       ; %bb.0:
1110; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1111; GFX8-NEXT:    s_and_b32 s34, 0xffff, s4
1112; GFX8-NEXT:    s_add_i32 s35, s34, -4
1113; GFX8-NEXT:    s_min_u32 s34, s34, s35
1114; GFX8-NEXT:    s_lshl_b32 s36, s34, 2
1115; GFX8-NEXT:    s_mov_b32 s34, 0x1c84a50f
1116; GFX8-NEXT:    s_mov_b32 s35, 0xb73e62d9
1117; GFX8-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1118; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1119; GFX8-NEXT:    s_setpc_b64 s[30:31]
1120;
1121; GFX9-LABEL: s_set_rounding_i3_zeroext:
1122; GFX9:       ; %bb.0:
1123; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124; GFX9-NEXT:    s_and_b32 s34, 0xffff, s4
1125; GFX9-NEXT:    s_add_i32 s35, s34, -4
1126; GFX9-NEXT:    s_min_u32 s34, s34, s35
1127; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
1128; GFX9-NEXT:    s_mov_b32 s34, 0x1c84a50f
1129; GFX9-NEXT:    s_mov_b32 s35, 0xb73e62d9
1130; GFX9-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1131; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1132; GFX9-NEXT:    s_setpc_b64 s[30:31]
1133;
1134; GFX10-LABEL: s_set_rounding_i3_zeroext:
1135; GFX10:       ; %bb.0:
1136; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137; GFX10-NEXT:    s_and_b32 s34, 0xffff, s4
1138; GFX10-NEXT:    s_add_i32 s35, s34, -4
1139; GFX10-NEXT:    s_min_u32 s36, s34, s35
1140; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
1141; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
1142; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
1143; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1144; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1145; GFX10-NEXT:    s_setpc_b64 s[30:31]
1146;
1147; GFX11-LABEL: s_set_rounding_i3_zeroext:
1148; GFX11:       ; %bb.0:
1149; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150; GFX11-NEXT:    s_and_b32 s0, 0xffff, s4
1151; GFX11-NEXT:    s_add_i32 s1, s0, -4
1152; GFX11-NEXT:    s_min_u32 s2, s0, s1
1153; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
1154; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
1155; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
1156; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1157; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1158; GFX11-NEXT:    s_setpc_b64 s[30:31]
1159  %sext.rounding = zext i3 %rounding to i32
1160  call void @llvm.set.rounding(i32 %sext.rounding)
1161  ret void
1162}
1163
1164define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) {
1165; GFX6-LABEL: s_set_rounding_select_0_1:
1166; GFX6:       ; %bb.0:
1167; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
1169; GFX6-NEXT:    s_cselect_b64 s[34:35], -1, 0
1170; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1171; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1172; GFX6-NEXT:    v_lshr_b32_e32 v0, 0xa50f, v0
1173; GFX6-NEXT:    v_readfirstlane_b32 s34, v0
1174; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1175; GFX6-NEXT:    s_setpc_b64 s[30:31]
1176;
1177; GFX7-LABEL: s_set_rounding_select_0_1:
1178; GFX7:       ; %bb.0:
1179; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1180; GFX7-NEXT:    s_cmp_lg_u32 s4, 0
1181; GFX7-NEXT:    s_cselect_b64 s[34:35], -1, 0
1182; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1183; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1184; GFX7-NEXT:    v_lshr_b32_e32 v0, 0xa50f, v0
1185; GFX7-NEXT:    v_readfirstlane_b32 s34, v0
1186; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1187; GFX7-NEXT:    s_setpc_b64 s[30:31]
1188;
1189; GFX8-LABEL: s_set_rounding_select_0_1:
1190; GFX8:       ; %bb.0:
1191; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1192; GFX8-NEXT:    s_cmp_lg_u32 s4, 0
1193; GFX8-NEXT:    s_cselect_b64 s[34:35], -1, 0
1194; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1195; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1196; GFX8-NEXT:    s_mov_b32 s34, 0xa50f
1197; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s34
1198; GFX8-NEXT:    v_readfirstlane_b32 s34, v0
1199; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1200; GFX8-NEXT:    s_setpc_b64 s[30:31]
1201;
1202; GFX9-LABEL: s_set_rounding_select_0_1:
1203; GFX9:       ; %bb.0:
1204; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205; GFX9-NEXT:    s_cmp_lg_u32 s4, 0
1206; GFX9-NEXT:    s_cselect_b64 s[34:35], -1, 0
1207; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1208; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1209; GFX9-NEXT:    s_mov_b32 s34, 0xa50f
1210; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s34
1211; GFX9-NEXT:    v_readfirstlane_b32 s34, v0
1212; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1213; GFX9-NEXT:    s_setpc_b64 s[30:31]
1214;
1215; GFX10-LABEL: s_set_rounding_select_0_1:
1216; GFX10:       ; %bb.0:
1217; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
1219; GFX10-NEXT:    s_cselect_b32 s34, -1, 0
1220; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s34
1221; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1222; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, 0xa50f
1223; GFX10-NEXT:    v_readfirstlane_b32 s34, v0
1224; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1225; GFX10-NEXT:    s_setpc_b64 s[30:31]
1226;
1227; GFX11-LABEL: s_set_rounding_select_0_1:
1228; GFX11:       ; %bb.0:
1229; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230; GFX11-NEXT:    s_cmp_lg_u32 s4, 0
1231; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
1232; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
1233; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1234; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, 0xa50f
1235; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1236; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1237; GFX11-NEXT:    s_setpc_b64 s[30:31]
1238  %cmp = icmp eq i32 %cond, 0
1239  %rounding = select i1 %cmp, i32 0, i32 1
1240  call void @llvm.set.rounding(i32 %rounding)
1241  ret void
1242}
1243
1244define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) {
1245; GFX678-LABEL: s_set_rounding_select_1_3:
1246; GFX678:       ; %bb.0:
1247; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
1249; GFX678-NEXT:    s_cselect_b32 s34, 0xa50, 10
1250; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1251; GFX678-NEXT:    s_setpc_b64 s[30:31]
1252;
1253; GFX9-LABEL: s_set_rounding_select_1_3:
1254; GFX9:       ; %bb.0:
1255; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1256; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1257; GFX9-NEXT:    s_cselect_b32 s34, 0xa50, 10
1258; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1259; GFX9-NEXT:    s_setpc_b64 s[30:31]
1260;
1261; GFX10-LABEL: s_set_rounding_select_1_3:
1262; GFX10:       ; %bb.0:
1263; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1264; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1265; GFX10-NEXT:    s_cselect_b32 s34, 0xa50, 10
1266; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1267; GFX10-NEXT:    s_setpc_b64 s[30:31]
1268;
1269; GFX11-LABEL: s_set_rounding_select_1_3:
1270; GFX11:       ; %bb.0:
1271; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1272; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1273; GFX11-NEXT:    s_cselect_b32 s0, 0xa50, 10
1274; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1275; GFX11-NEXT:    s_setpc_b64 s[30:31]
1276  %cmp = icmp eq i32 %cond, 0
1277  %rounding = select i1 %cmp, i32 1, i32 3
1278  call void @llvm.set.rounding(i32 %rounding)
1279  ret void
1280}
1281
1282define void @v_set_rounding_select_1_3(i32 %cond) {
1283; GFX678-LABEL: v_set_rounding_select_1_3:
1284; GFX678:       ; %bb.0:
1285; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1286; GFX678-NEXT:    v_mov_b32_e32 v1, 0xa50
1287; GFX678-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1288; GFX678-NEXT:    v_cndmask_b32_e32 v0, 10, v1, vcc
1289; GFX678-NEXT:    v_readfirstlane_b32 s4, v0
1290; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
1291; GFX678-NEXT:    s_setpc_b64 s[30:31]
1292;
1293; GFX9-LABEL: v_set_rounding_select_1_3:
1294; GFX9:       ; %bb.0:
1295; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296; GFX9-NEXT:    v_mov_b32_e32 v1, 0xa50
1297; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1298; GFX9-NEXT:    v_cndmask_b32_e32 v0, 10, v1, vcc
1299; GFX9-NEXT:    v_readfirstlane_b32 s4, v0
1300; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
1301; GFX9-NEXT:    s_setpc_b64 s[30:31]
1302;
1303; GFX10-LABEL: v_set_rounding_select_1_3:
1304; GFX10:       ; %bb.0:
1305; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1306; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1307; GFX10-NEXT:    v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
1308; GFX10-NEXT:    v_readfirstlane_b32 s4, v0
1309; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
1310; GFX10-NEXT:    s_setpc_b64 s[30:31]
1311;
1312; GFX11-LABEL: v_set_rounding_select_1_3:
1313; GFX11:       ; %bb.0:
1314; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1315; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1316; GFX11-NEXT:    v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
1317; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1318; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1319; GFX11-NEXT:    s_setpc_b64 s[30:31]
1320  %cmp = icmp eq i32 %cond, 0
1321  %rounding = select i1 %cmp, i32 1, i32 3
1322  call void @llvm.set.rounding(i32 %rounding)
1323  ret void
1324}
1325
1326define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
1327; GFX6-LABEL: s_set_rounding_select_2_0:
1328; GFX6:       ; %bb.0:
1329; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
1331; GFX6-NEXT:    s_cselect_b64 s[34:35], -1, 0
1332; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1333; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1334; GFX6-NEXT:    v_lshr_b32_e32 v0, 0xa50f, v0
1335; GFX6-NEXT:    v_readfirstlane_b32 s34, v0
1336; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1337; GFX6-NEXT:    s_setpc_b64 s[30:31]
1338;
1339; GFX7-LABEL: s_set_rounding_select_2_0:
1340; GFX7:       ; %bb.0:
1341; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342; GFX7-NEXT:    s_cmp_eq_u32 s4, 0
1343; GFX7-NEXT:    s_cselect_b64 s[34:35], -1, 0
1344; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1345; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1346; GFX7-NEXT:    v_lshr_b32_e32 v0, 0xa50f, v0
1347; GFX7-NEXT:    v_readfirstlane_b32 s34, v0
1348; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1349; GFX7-NEXT:    s_setpc_b64 s[30:31]
1350;
1351; GFX8-LABEL: s_set_rounding_select_2_0:
1352; GFX8:       ; %bb.0:
1353; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1354; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
1355; GFX8-NEXT:    s_cselect_b64 s[34:35], -1, 0
1356; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1357; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1358; GFX8-NEXT:    s_mov_b32 s34, 0xa50f
1359; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s34
1360; GFX8-NEXT:    v_readfirstlane_b32 s34, v0
1361; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1362; GFX8-NEXT:    s_setpc_b64 s[30:31]
1363;
1364; GFX9-LABEL: s_set_rounding_select_2_0:
1365; GFX9:       ; %bb.0:
1366; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1368; GFX9-NEXT:    s_cselect_b64 s[34:35], -1, 0
1369; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1370; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1371; GFX9-NEXT:    s_mov_b32 s34, 0xa50f
1372; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s34
1373; GFX9-NEXT:    v_readfirstlane_b32 s34, v0
1374; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1375; GFX9-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; GFX10-LABEL: s_set_rounding_select_2_0:
1378; GFX10:       ; %bb.0:
1379; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1381; GFX10-NEXT:    s_cselect_b32 s34, -1, 0
1382; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s34
1383; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1384; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, 0xa50f
1385; GFX10-NEXT:    v_readfirstlane_b32 s34, v0
1386; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1387; GFX10-NEXT:    s_setpc_b64 s[30:31]
1388;
1389; GFX11-LABEL: s_set_rounding_select_2_0:
1390; GFX11:       ; %bb.0:
1391; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1393; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
1394; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
1395; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1396; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, 0xa50f
1397; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1398; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1399; GFX11-NEXT:    s_setpc_b64 s[30:31]
1400  %cmp = icmp eq i32 %cond, 0
1401  %rounding = select i1 %cmp, i32 2, i32 0
1402  call void @llvm.set.rounding(i32 %rounding)
1403  ret void
1404}
1405
1406define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
1407; GFX678-LABEL: s_set_rounding_select_2_1:
1408; GFX678:       ; %bb.0:
1409; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1410; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
1411; GFX678-NEXT:    s_movk_i32 s34, 0xa5
1412; GFX678-NEXT:    s_cselect_b32 s34, s34, 0xa50
1413; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1414; GFX678-NEXT:    s_setpc_b64 s[30:31]
1415;
1416; GFX9-LABEL: s_set_rounding_select_2_1:
1417; GFX9:       ; %bb.0:
1418; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1420; GFX9-NEXT:    s_movk_i32 s34, 0xa5
1421; GFX9-NEXT:    s_cselect_b32 s34, s34, 0xa50
1422; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1423; GFX9-NEXT:    s_setpc_b64 s[30:31]
1424;
1425; GFX10-LABEL: s_set_rounding_select_2_1:
1426; GFX10:       ; %bb.0:
1427; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1428; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1429; GFX10-NEXT:    s_movk_i32 s34, 0xa5
1430; GFX10-NEXT:    s_cselect_b32 s34, s34, 0xa50
1431; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1432; GFX10-NEXT:    s_setpc_b64 s[30:31]
1433;
1434; GFX11-LABEL: s_set_rounding_select_2_1:
1435; GFX11:       ; %bb.0:
1436; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1438; GFX11-NEXT:    s_movk_i32 s0, 0xa5
1439; GFX11-NEXT:    s_cselect_b32 s0, s0, 0xa50
1440; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1441; GFX11-NEXT:    s_setpc_b64 s[30:31]
1442  %cmp = icmp eq i32 %cond, 0
1443  %rounding = select i1 %cmp, i32 2, i32 1
1444  call void @llvm.set.rounding(i32 %rounding)
1445  ret void
1446}
1447
1448define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
1449; GFX678-LABEL: s_set_rounding_select_1_2:
1450; GFX678:       ; %bb.0:
1451; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
1453; GFX678-NEXT:    s_movk_i32 s34, 0xa50
1454; GFX678-NEXT:    s_cselect_b32 s34, s34, 0xa5
1455; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1456; GFX678-NEXT:    s_setpc_b64 s[30:31]
1457;
1458; GFX9-LABEL: s_set_rounding_select_1_2:
1459; GFX9:       ; %bb.0:
1460; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1462; GFX9-NEXT:    s_movk_i32 s34, 0xa50
1463; GFX9-NEXT:    s_cselect_b32 s34, s34, 0xa5
1464; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1465; GFX9-NEXT:    s_setpc_b64 s[30:31]
1466;
1467; GFX10-LABEL: s_set_rounding_select_1_2:
1468; GFX10:       ; %bb.0:
1469; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1470; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1471; GFX10-NEXT:    s_movk_i32 s34, 0xa50
1472; GFX10-NEXT:    s_cselect_b32 s34, s34, 0xa5
1473; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1474; GFX10-NEXT:    s_setpc_b64 s[30:31]
1475;
1476; GFX11-LABEL: s_set_rounding_select_1_2:
1477; GFX11:       ; %bb.0:
1478; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1479; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1480; GFX11-NEXT:    s_movk_i32 s0, 0xa50
1481; GFX11-NEXT:    s_cselect_b32 s0, s0, 0xa5
1482; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1483; GFX11-NEXT:    s_setpc_b64 s[30:31]
1484  %cmp = icmp eq i32 %cond, 0
1485  %rounding = select i1 %cmp, i32 1, i32 2
1486  call void @llvm.set.rounding(i32 %rounding)
1487  ret void
1488}
1489
1490define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) {
1491; GFX678-LABEL: s_set_rounding_select_3_0:
1492; GFX678:       ; %bb.0:
1493; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
1495; GFX678-NEXT:    s_cselect_b32 s34, 10, 0xa50f
1496; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1497; GFX678-NEXT:    s_setpc_b64 s[30:31]
1498;
1499; GFX9-LABEL: s_set_rounding_select_3_0:
1500; GFX9:       ; %bb.0:
1501; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1503; GFX9-NEXT:    s_cselect_b32 s34, 10, 0xa50f
1504; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1505; GFX9-NEXT:    s_setpc_b64 s[30:31]
1506;
1507; GFX10-LABEL: s_set_rounding_select_3_0:
1508; GFX10:       ; %bb.0:
1509; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1511; GFX10-NEXT:    s_cselect_b32 s34, 10, 0xa50f
1512; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1513; GFX10-NEXT:    s_setpc_b64 s[30:31]
1514;
1515; GFX11-LABEL: s_set_rounding_select_3_0:
1516; GFX11:       ; %bb.0:
1517; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1518; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1519; GFX11-NEXT:    s_cselect_b32 s0, 10, 0xa50f
1520; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1521; GFX11-NEXT:    s_setpc_b64 s[30:31]
1522  %cmp = icmp eq i32 %cond, 0
1523  %rounding = select i1 %cmp, i32 3, i32 0
1524  call void @llvm.set.rounding(i32 %rounding)
1525  ret void
1526}
1527
1528define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
1529; GFX678-LABEL: s_set_rounding_select_4_0:
1530; GFX678:       ; %bb.0:
1531; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
1533; GFX678-NEXT:    s_cselect_b64 s[34:35], -1, 0
1534; GFX678-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1535; GFX678-NEXT:    v_readfirstlane_b32 s34, v0
1536; GFX678-NEXT:    s_lshl_b32 s34, s34, 2
1537; GFX678-NEXT:    s_add_i32 s35, s34, -4
1538; GFX678-NEXT:    s_min_u32 s34, s34, s35
1539; GFX678-NEXT:    s_lshl_b32 s36, s34, 2
1540; GFX678-NEXT:    s_mov_b32 s34, 0x1c84a50f
1541; GFX678-NEXT:    s_mov_b32 s35, 0xb73e62d9
1542; GFX678-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1543; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1544; GFX678-NEXT:    s_setpc_b64 s[30:31]
1545;
1546; GFX9-LABEL: s_set_rounding_select_4_0:
1547; GFX9:       ; %bb.0:
1548; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1549; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1550; GFX9-NEXT:    s_cselect_b64 s[34:35], -1, 0
1551; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
1552; GFX9-NEXT:    v_readfirstlane_b32 s34, v0
1553; GFX9-NEXT:    s_lshl_b32 s34, s34, 2
1554; GFX9-NEXT:    s_add_i32 s35, s34, -4
1555; GFX9-NEXT:    s_min_u32 s34, s34, s35
1556; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
1557; GFX9-NEXT:    s_mov_b32 s34, 0x1c84a50f
1558; GFX9-NEXT:    s_mov_b32 s35, 0xb73e62d9
1559; GFX9-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1560; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1561; GFX9-NEXT:    s_setpc_b64 s[30:31]
1562;
1563; GFX10-LABEL: s_set_rounding_select_4_0:
1564; GFX10:       ; %bb.0:
1565; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1567; GFX10-NEXT:    s_cselect_b32 s34, -1, 0
1568; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s34
1569; GFX10-NEXT:    v_readfirstlane_b32 s34, v0
1570; GFX10-NEXT:    s_lshl_b32 s34, s34, 2
1571; GFX10-NEXT:    s_add_i32 s35, s34, -4
1572; GFX10-NEXT:    s_min_u32 s36, s34, s35
1573; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
1574; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
1575; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
1576; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1577; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1578; GFX10-NEXT:    s_setpc_b64 s[30:31]
1579;
1580; GFX11-LABEL: s_set_rounding_select_4_0:
1581; GFX11:       ; %bb.0:
1582; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1584; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
1585; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
1586; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1587; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
1588; GFX11-NEXT:    s_add_i32 s1, s0, -4
1589; GFX11-NEXT:    s_min_u32 s2, s0, s1
1590; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
1591; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
1592; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
1593; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1594; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1595; GFX11-NEXT:    s_setpc_b64 s[30:31]
1596  %cmp = icmp eq i32 %cond, 0
1597  %rounding = select i1 %cmp, i32 4, i32 0
1598  call void @llvm.set.rounding(i32 %rounding)
1599  ret void
1600}
1601
1602define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
1603; GFX678-LABEL: s_set_rounding_select_3_5:
1604; GFX678:       ; %bb.0:
1605; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
1607; GFX678-NEXT:    s_cselect_b32 s34, 3, 5
1608; GFX678-NEXT:    s_add_i32 s35, s34, -4
1609; GFX678-NEXT:    s_min_u32 s34, s34, s35
1610; GFX678-NEXT:    s_lshl_b32 s36, s34, 2
1611; GFX678-NEXT:    s_mov_b32 s34, 0x1c84a50f
1612; GFX678-NEXT:    s_mov_b32 s35, 0xb73e62d9
1613; GFX678-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1614; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1615; GFX678-NEXT:    s_setpc_b64 s[30:31]
1616;
1617; GFX9-LABEL: s_set_rounding_select_3_5:
1618; GFX9:       ; %bb.0:
1619; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
1621; GFX9-NEXT:    s_cselect_b32 s34, 3, 5
1622; GFX9-NEXT:    s_add_i32 s35, s34, -4
1623; GFX9-NEXT:    s_min_u32 s34, s34, s35
1624; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
1625; GFX9-NEXT:    s_mov_b32 s34, 0x1c84a50f
1626; GFX9-NEXT:    s_mov_b32 s35, 0xb73e62d9
1627; GFX9-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1628; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1629; GFX9-NEXT:    s_setpc_b64 s[30:31]
1630;
1631; GFX10-LABEL: s_set_rounding_select_3_5:
1632; GFX10:       ; %bb.0:
1633; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1634; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
1635; GFX10-NEXT:    s_cselect_b32 s34, 3, 5
1636; GFX10-NEXT:    s_add_i32 s35, s34, -4
1637; GFX10-NEXT:    s_min_u32 s36, s34, s35
1638; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
1639; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
1640; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
1641; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
1642; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1643; GFX10-NEXT:    s_setpc_b64 s[30:31]
1644;
1645; GFX11-LABEL: s_set_rounding_select_3_5:
1646; GFX11:       ; %bb.0:
1647; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
1649; GFX11-NEXT:    s_cselect_b32 s0, 3, 5
1650; GFX11-NEXT:    s_add_i32 s1, s0, -4
1651; GFX11-NEXT:    s_min_u32 s2, s0, s1
1652; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
1653; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
1654; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
1655; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1656; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1657; GFX11-NEXT:    s_setpc_b64 s[30:31]
1658  %cmp = icmp eq i32 %cond, 0
1659  %rounding = select i1 %cmp, i32 3, i32 5
1660  call void @llvm.set.rounding(i32 %rounding)
1661  ret void
1662}
1663
1664define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
1665; GFX6-LABEL: get_rounding_after_set_rounding_1:
1666; GFX6:       ; %bb.0:
1667; GFX6-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
1668; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1669; GFX6-NEXT:    s_nop 0
1670; GFX6-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1671; GFX6-NEXT:    s_lshl_b32 s2, s0, 2
1672; GFX6-NEXT:    s_mov_b32 s0, 0xeb24da71
1673; GFX6-NEXT:    s_mov_b32 s1, 0xc96f385
1674; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1675; GFX6-NEXT:    s_and_b32 s0, s0, 15
1676; GFX6-NEXT:    s_add_i32 s1, s0, 4
1677; GFX6-NEXT:    s_cmp_lt_u32 s0, 4
1678; GFX6-NEXT:    s_cselect_b32 s4, s0, s1
1679; GFX6-NEXT:    s_mov_b32 s0, 0
1680; GFX6-NEXT:    s_mov_b32 s2, -1
1681; GFX6-NEXT:    s_mov_b32 s1, s0
1682; GFX6-NEXT:    v_mov_b32_e32 v0, s4
1683; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1684; GFX6-NEXT:    s_waitcnt vmcnt(0)
1685; GFX6-NEXT:    s_endpgm
1686;
1687; GFX7-LABEL: get_rounding_after_set_rounding_1:
1688; GFX7:       ; %bb.0:
1689; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
1690; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1691; GFX7-NEXT:    s_nop 0
1692; GFX7-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1693; GFX7-NEXT:    s_lshl_b32 s2, s0, 2
1694; GFX7-NEXT:    s_mov_b32 s0, 0xeb24da71
1695; GFX7-NEXT:    s_mov_b32 s1, 0xc96f385
1696; GFX7-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1697; GFX7-NEXT:    s_and_b32 s0, s0, 15
1698; GFX7-NEXT:    s_add_i32 s1, s0, 4
1699; GFX7-NEXT:    s_cmp_lt_u32 s0, 4
1700; GFX7-NEXT:    s_cselect_b32 s4, s0, s1
1701; GFX7-NEXT:    s_mov_b32 s0, 0
1702; GFX7-NEXT:    s_mov_b32 s2, -1
1703; GFX7-NEXT:    s_mov_b32 s1, s0
1704; GFX7-NEXT:    v_mov_b32_e32 v0, s4
1705; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1706; GFX7-NEXT:    s_waitcnt vmcnt(0)
1707; GFX7-NEXT:    s_endpgm
1708;
1709; GFX8-LABEL: get_rounding_after_set_rounding_1:
1710; GFX8:       ; %bb.0:
1711; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
1712; GFX8-NEXT:    v_mov_b32_e32 v0, 0
1713; GFX8-NEXT:    v_mov_b32_e32 v1, 0
1714; GFX8-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1715; GFX8-NEXT:    s_lshl_b32 s2, s0, 2
1716; GFX8-NEXT:    s_mov_b32 s0, 0xeb24da71
1717; GFX8-NEXT:    s_mov_b32 s1, 0xc96f385
1718; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1719; GFX8-NEXT:    s_and_b32 s0, s0, 15
1720; GFX8-NEXT:    s_add_i32 s1, s0, 4
1721; GFX8-NEXT:    s_cmp_lt_u32 s0, 4
1722; GFX8-NEXT:    s_cselect_b32 s0, s0, s1
1723; GFX8-NEXT:    v_mov_b32_e32 v2, s0
1724; GFX8-NEXT:    flat_store_dword v[0:1], v2
1725; GFX8-NEXT:    s_waitcnt vmcnt(0)
1726; GFX8-NEXT:    s_endpgm
1727;
1728; GFX9-LABEL: get_rounding_after_set_rounding_1:
1729; GFX9:       ; %bb.0:
1730; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
1731; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1732; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1733; GFX9-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1734; GFX9-NEXT:    s_lshl_b32 s2, s0, 2
1735; GFX9-NEXT:    s_mov_b32 s0, 0xeb24da71
1736; GFX9-NEXT:    s_mov_b32 s1, 0xc96f385
1737; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1738; GFX9-NEXT:    s_and_b32 s0, s0, 15
1739; GFX9-NEXT:    s_add_i32 s1, s0, 4
1740; GFX9-NEXT:    s_cmp_lt_u32 s0, 4
1741; GFX9-NEXT:    s_cselect_b32 s0, s0, s1
1742; GFX9-NEXT:    v_mov_b32_e32 v2, s0
1743; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1744; GFX9-NEXT:    s_waitcnt vmcnt(0)
1745; GFX9-NEXT:    s_endpgm
1746;
1747; GFX10-LABEL: get_rounding_after_set_rounding_1:
1748; GFX10:       ; %bb.0:
1749; GFX10-NEXT:    s_round_mode 0x0
1750; GFX10-NEXT:    s_mov_b32 s0, 0xeb24da71
1751; GFX10-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
1752; GFX10-NEXT:    s_mov_b32 s1, 0xc96f385
1753; GFX10-NEXT:    s_lshl_b32 s2, s2, 2
1754; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1755; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1756; GFX10-NEXT:    v_mov_b32_e32 v1, 0
1757; GFX10-NEXT:    s_and_b32 s0, s0, 15
1758; GFX10-NEXT:    s_add_i32 s1, s0, 4
1759; GFX10-NEXT:    s_cmp_lt_u32 s0, 4
1760; GFX10-NEXT:    s_cselect_b32 s0, s0, s1
1761; GFX10-NEXT:    v_mov_b32_e32 v2, s0
1762; GFX10-NEXT:    global_store_dword v[0:1], v2, off
1763; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1764; GFX10-NEXT:    s_endpgm
1765;
1766; GFX11-LABEL: get_rounding_after_set_rounding_1:
1767; GFX11:       ; %bb.0:
1768; GFX11-NEXT:    s_round_mode 0x0
1769; GFX11-NEXT:    s_mov_b32 s0, 0xeb24da71
1770; GFX11-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
1771; GFX11-NEXT:    s_mov_b32 s1, 0xc96f385
1772; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
1773; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1774; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1775; GFX11-NEXT:    s_and_b32 s0, s0, 15
1776; GFX11-NEXT:    s_add_i32 s1, s0, 4
1777; GFX11-NEXT:    s_cmp_lt_u32 s0, 4
1778; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
1779; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
1780; GFX11-NEXT:    global_store_b32 v[0:1], v2, off dlc
1781; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1782; GFX11-NEXT:    s_endpgm
1783  tail call void @llvm.set.rounding(i32 1)
1784  %set.mode = tail call i32 @llvm.get.rounding()
1785  store volatile i32 %set.mode, ptr addrspace(1) null
1786  ret void
1787}
1788
1789;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1790; GCN: {{.*}}
1791