xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lround.ll (revision e78156a0e225673e592920410c8cadc94f19aa66)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-GISEL %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-SDAG %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-GISEL %s
6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-SDAG %s
7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-GISEL %s
8
9define i32 @intrinsic_lround_i32_f32(float %arg) {
10; GFX9-SDAG-LABEL: intrinsic_lround_i32_f32:
11; GFX9-SDAG:       ; %bb.0: ; %entry
12; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
14; GFX9-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
15; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
16; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
17; GFX9-SDAG-NEXT:    s_brev_b32 s4, -2
18; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s4, v2, v0
19; GFX9-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
20; GFX9-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
21; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX9-GISEL-LABEL: intrinsic_lround_i32_f32:
24; GFX9-GISEL:       ; %bb.0: ; %entry
25; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
27; GFX9-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
28; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
29; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
30; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v3, 1
31; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, v3, v2
32; GFX9-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
33; GFX9-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
34; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
35;
36; GFX10-SDAG-LABEL: intrinsic_lround_i32_f32:
37; GFX10-SDAG:       ; %bb.0: ; %entry
38; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
40; GFX10-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
41; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v2|, 0.5
42; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s4
43; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
44; GFX10-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
45; GFX10-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
46; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
47;
48; GFX10-GISEL-LABEL: intrinsic_lround_i32_f32:
49; GFX10-GISEL:       ; %bb.0: ; %entry
50; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
52; GFX10-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
53; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v2|, 0.5
54; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s4
55; GFX10-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v2
56; GFX10-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
57; GFX10-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
58; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
59;
60; GFX11-SDAG-LABEL: intrinsic_lround_i32_f32:
61; GFX11-SDAG:       ; %bb.0: ; %entry
62; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
64; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
65; GFX11-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
66; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
67; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
68; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
69; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
70; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
71; GFX11-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
72; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
73; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX11-GISEL-LABEL: intrinsic_lround_i32_f32:
76; GFX11-GISEL:       ; %bb.0: ; %entry
77; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
79; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
80; GFX11-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
81; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
82; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
83; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
84; GFX11-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v2
85; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
86; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
87; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
88; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
89entry:
90  %res = tail call i32 @llvm.lround.i32.f32(float %arg)
91  ret i32 %res
92}
93
94define i32 @intrinsic_lround_i32_f64(double %arg) {
95; GFX9-SDAG-LABEL: intrinsic_lround_i32_f64:
96; GFX9-SDAG:       ; %bb.0: ; %entry
97; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; GFX9-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
99; GFX9-SDAG-NEXT:    s_brev_b32 s4, -2
100; GFX9-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
101; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x3ff00000
102; GFX9-SDAG-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
103; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
104; GFX9-SDAG-NEXT:    v_bfi_b32 v1, s4, v0, v1
105; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
106; GFX9-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
107; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
108; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
109;
110; GFX9-GISEL-LABEL: intrinsic_lround_i32_f64:
111; GFX9-GISEL:       ; %bb.0: ; %entry
112; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
114; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
115; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
116; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
117; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
118; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
119; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
120; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
121; GFX9-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
122; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
123; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX10-SDAG-LABEL: intrinsic_lround_i32_f64:
126; GFX10-SDAG:       ; %bb.0: ; %entry
127; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX10-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
129; GFX10-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
130; GFX10-SDAG-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
131; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x3ff00000, s4
132; GFX10-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v0, v1
133; GFX10-SDAG-NEXT:    v_mov_b32_e32 v0, 0
134; GFX10-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
135; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
136; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
137;
138; GFX10-GISEL-LABEL: intrinsic_lround_i32_f64:
139; GFX10-GISEL:       ; %bb.0: ; %entry
140; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
142; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
143; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
144; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
145; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
146; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
147; GFX10-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
148; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
149; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
150;
151; GFX11-SDAG-LABEL: intrinsic_lround_i32_f64:
152; GFX11-SDAG:       ; %bb.0: ; %entry
153; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; GFX11-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
155; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
156; GFX11-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
157; GFX11-SDAG-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
158; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
159; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x3ff00000, s0
160; GFX11-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v0, v1
161; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
162; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
163; GFX11-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
164; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
165; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX11-GISEL-LABEL: intrinsic_lround_i32_f64:
168; GFX11-GISEL:       ; %bb.0: ; %entry
169; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
171; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
172; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
173; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
174; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
175; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
176; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
177; GFX11-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
178; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
179; GFX11-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
180; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
181; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
182entry:
183  %res = tail call i32 @llvm.lround.i32.f64(double %arg)
184  ret i32 %res
185}
186
187define i64 @intrinsic_lround_i64_f32(float %arg) {
188; GFX9-SDAG-LABEL: intrinsic_lround_i64_f32:
189; GFX9-SDAG:       ; %bb.0: ; %entry
190; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
192; GFX9-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
193; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
194; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
195; GFX9-SDAG-NEXT:    s_brev_b32 s4, -2
196; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s4, v2, v0
197; GFX9-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
198; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
199; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
200; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v0|, s4
201; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
202; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0xcf800000
203; GFX9-SDAG-NEXT:    v_fma_f32 v2, v1, s4, |v0|
204; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
205; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
206; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
207; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v2, v3
208; GFX9-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
209; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
210; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
211; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
212;
213; GFX9-GISEL-LABEL: intrinsic_lround_i64_f32:
214; GFX9-GISEL:       ; %bb.0: ; %entry
215; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
217; GFX9-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
218; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
219; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
220; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v3, 1
221; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, v3, v2
222; GFX9-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
223; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
224; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x2f800000
225; GFX9-GISEL-NEXT:    v_mul_f32_e64 v2, |v1|, v2
226; GFX9-GISEL-NEXT:    v_floor_f32_e32 v2, v2
227; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0xcf800000
228; GFX9-GISEL-NEXT:    v_fma_f32 v1, v2, v3, |v1|
229; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
230; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
231; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
232; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v1, v3
233; GFX9-GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
234; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
235; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
236; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
237;
238; GFX10-SDAG-LABEL: intrinsic_lround_i64_f32:
239; GFX10-SDAG:       ; %bb.0: ; %entry
240; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
242; GFX10-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
243; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v2|, 0.5
244; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s4
245; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
246; GFX10-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
247; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
248; GFX10-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
249; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
250; GFX10-SDAG-NEXT:    v_floor_f32_e32 v1, v1
251; GFX10-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
252; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
253; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
254; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
255; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
256; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
257; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
258; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX10-GISEL-LABEL: intrinsic_lround_i64_f32:
261; GFX10-GISEL:       ; %bb.0: ; %entry
262; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
264; GFX10-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
265; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v2|, 0.5
266; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s4
267; GFX10-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v2
268; GFX10-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
269; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
270; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
271; GFX10-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
272; GFX10-GISEL-NEXT:    v_floor_f32_e32 v2, v2
273; GFX10-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
274; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
275; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
276; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
277; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
278; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
279; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
280; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
281;
282; GFX11-SDAG-LABEL: intrinsic_lround_i64_f32:
283; GFX11-SDAG:       ; %bb.0: ; %entry
284; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
286; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
287; GFX11-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
288; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
289; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
290; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
291; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
292; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
293; GFX11-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
294; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
295; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
296; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
297; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
298; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
299; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
300; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
301; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
302; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
303; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
304; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
305; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
306; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
307; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
308; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
309; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
310;
311; GFX11-GISEL-LABEL: intrinsic_lround_i64_f32:
312; GFX11-GISEL:       ; %bb.0: ; %entry
313; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
315; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
316; GFX11-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
317; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
318; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
319; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
320; GFX11-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v2
321; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
322; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
323; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
324; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
325; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
326; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
327; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
328; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
329; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
330; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
331; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
332; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
333; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
334; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
335; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
336; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
337; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
338; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
339entry:
340  %res = tail call i64 @llvm.lround.i64.f32(float %arg)
341  ret i64 %res
342}
343
344define i64 @intrinsic_lround_i64_f64(double %arg) {
345; GFX9-SDAG-LABEL: intrinsic_lround_i64_f64:
346; GFX9-SDAG:       ; %bb.0: ; %entry
347; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; GFX9-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
349; GFX9-SDAG-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
350; GFX9-SDAG-NEXT:    s_brev_b32 s4, -2
351; GFX9-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
352; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
353; GFX9-SDAG-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
354; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
355; GFX9-SDAG-NEXT:    v_bfi_b32 v1, s4, v4, v1
356; GFX9-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
357; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0xffe0
358; GFX9-SDAG-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
359; GFX9-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
360; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
361; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xc1f00000
362; GFX9-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
363; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
364; GFX9-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
365; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
366; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
367;
368; GFX9-GISEL-LABEL: intrinsic_lround_i64_f64:
369; GFX9-GISEL:       ; %bb.0: ; %entry
370; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
372; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
373; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
374; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
375; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
376; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
377; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
378; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
379; GFX9-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
380; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
381; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3df00000
382; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
383; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
384; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
385; GFX9-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
386; GFX9-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
387; GFX9-GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
388; GFX9-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
389; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
390; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX10-SDAG-LABEL: intrinsic_lround_i64_f64:
393; GFX10-SDAG:       ; %bb.0: ; %entry
394; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX10-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
396; GFX10-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
397; GFX10-SDAG-NEXT:    v_mov_b32_e32 v0, 0
398; GFX10-SDAG-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
399; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
400; GFX10-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v4, v1
401; GFX10-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
402; GFX10-SDAG-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
403; GFX10-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
404; GFX10-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
405; GFX10-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
406; GFX10-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
407; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
408; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
409;
410; GFX10-GISEL-LABEL: intrinsic_lround_i64_f64:
411; GFX10-GISEL:       ; %bb.0: ; %entry
412; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
414; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
415; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
416; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
417; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
418; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
419; GFX10-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
420; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
421; GFX10-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
422; GFX10-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
423; GFX10-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
424; GFX10-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
425; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
426; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX11-SDAG-LABEL: intrinsic_lround_i64_f64:
429; GFX11-SDAG:       ; %bb.0: ; %entry
430; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX11-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
432; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
433; GFX11-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
434; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
435; GFX11-SDAG-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
436; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
437; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
438; GFX11-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v4, v1
439; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
440; GFX11-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
441; GFX11-SDAG-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
442; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
443; GFX11-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
444; GFX11-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
445; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
446; GFX11-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
447; GFX11-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
448; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
449; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
450;
451; GFX11-GISEL-LABEL: intrinsic_lround_i64_f64:
452; GFX11-GISEL:       ; %bb.0: ; %entry
453; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
455; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
456; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
457; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
458; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
459; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
460; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
461; GFX11-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
462; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
463; GFX11-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
464; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
465; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
466; GFX11-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
467; GFX11-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
468; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
469; GFX11-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
470; GFX11-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
471; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
472; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
473entry:
474  %res = tail call i64 @llvm.lround.i64.f64(double %arg)
475  ret i64 %res
476}
477
478define i64 @intrinsic_llround_i64_f32(float %arg) {
479; GFX9-SDAG-LABEL: intrinsic_llround_i64_f32:
480; GFX9-SDAG:       ; %bb.0: ; %entry
481; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
483; GFX9-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
484; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
485; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
486; GFX9-SDAG-NEXT:    s_brev_b32 s4, -2
487; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s4, v2, v0
488; GFX9-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
489; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
490; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
491; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v0|, s4
492; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
493; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0xcf800000
494; GFX9-SDAG-NEXT:    v_fma_f32 v2, v1, s4, |v0|
495; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
496; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
497; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
498; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v2, v3
499; GFX9-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
500; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
501; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
502; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
503;
504; GFX9-GISEL-LABEL: intrinsic_llround_i64_f32:
505; GFX9-GISEL:       ; %bb.0: ; %entry
506; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
508; GFX9-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
509; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
510; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
511; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v3, 1
512; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, v3, v2
513; GFX9-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
514; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
515; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x2f800000
516; GFX9-GISEL-NEXT:    v_mul_f32_e64 v2, |v1|, v2
517; GFX9-GISEL-NEXT:    v_floor_f32_e32 v2, v2
518; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0xcf800000
519; GFX9-GISEL-NEXT:    v_fma_f32 v1, v2, v3, |v1|
520; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
521; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
522; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
523; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v1, v3
524; GFX9-GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
525; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
526; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
527; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
528;
529; GFX10-SDAG-LABEL: intrinsic_llround_i64_f32:
530; GFX10-SDAG:       ; %bb.0: ; %entry
531; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
533; GFX10-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
534; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v2|, 0.5
535; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s4
536; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
537; GFX10-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
538; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
539; GFX10-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
540; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
541; GFX10-SDAG-NEXT:    v_floor_f32_e32 v1, v1
542; GFX10-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
543; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
544; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
545; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
546; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
547; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
548; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
549; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX10-GISEL-LABEL: intrinsic_llround_i64_f32:
552; GFX10-GISEL:       ; %bb.0: ; %entry
553; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
555; GFX10-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
556; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v2|, 0.5
557; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s4
558; GFX10-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v2
559; GFX10-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
560; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
561; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
562; GFX10-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
563; GFX10-GISEL-NEXT:    v_floor_f32_e32 v2, v2
564; GFX10-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
565; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
566; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
567; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
568; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
569; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
570; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
571; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
572;
573; GFX11-SDAG-LABEL: intrinsic_llround_i64_f32:
574; GFX11-SDAG:       ; %bb.0: ; %entry
575; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v1, v0
577; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
578; GFX11-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
579; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
580; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
581; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
582; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
583; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
584; GFX11-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
585; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
586; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
587; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
588; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
589; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
590; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
591; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
592; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
593; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
594; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
595; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
596; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
597; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
598; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
599; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
600; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
601;
602; GFX11-GISEL-LABEL: intrinsic_llround_i64_f32:
603; GFX11-GISEL:       ; %bb.0: ; %entry
604; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
606; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
607; GFX11-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
608; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
609; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
610; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
611; GFX11-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v2
612; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
613; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, v1, v0
614; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
615; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
616; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
617; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
618; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
619; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
620; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
621; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
622; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
623; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
624; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
625; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
626; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
627; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
628; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
629; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
630entry:
631  %res = tail call i64 @llvm.llround.i64.f32(float %arg)
632  ret i64 %res
633}
634
635define i64 @intrinsic_llround_i64_f64(double %arg) {
636; GFX9-SDAG-LABEL: intrinsic_llround_i64_f64:
637; GFX9-SDAG:       ; %bb.0: ; %entry
638; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639; GFX9-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
640; GFX9-SDAG-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
641; GFX9-SDAG-NEXT:    s_brev_b32 s4, -2
642; GFX9-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
643; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
644; GFX9-SDAG-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
645; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
646; GFX9-SDAG-NEXT:    v_bfi_b32 v1, s4, v4, v1
647; GFX9-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
648; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0xffe0
649; GFX9-SDAG-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
650; GFX9-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
651; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
652; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xc1f00000
653; GFX9-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
654; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
655; GFX9-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
656; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
657; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
658;
659; GFX9-GISEL-LABEL: intrinsic_llround_i64_f64:
660; GFX9-GISEL:       ; %bb.0: ; %entry
661; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
663; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
664; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
665; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
666; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
667; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
668; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
669; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
670; GFX9-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
671; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
672; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3df00000
673; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
674; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
675; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
676; GFX9-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
677; GFX9-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
678; GFX9-GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
679; GFX9-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
680; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
681; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
682;
683; GFX10-SDAG-LABEL: intrinsic_llround_i64_f64:
684; GFX10-SDAG:       ; %bb.0: ; %entry
685; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
686; GFX10-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
687; GFX10-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
688; GFX10-SDAG-NEXT:    v_mov_b32_e32 v0, 0
689; GFX10-SDAG-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
690; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
691; GFX10-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v4, v1
692; GFX10-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
693; GFX10-SDAG-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
694; GFX10-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
695; GFX10-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
696; GFX10-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
697; GFX10-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
698; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
699; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
700;
701; GFX10-GISEL-LABEL: intrinsic_llround_i64_f64:
702; GFX10-GISEL:       ; %bb.0: ; %entry
703; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
705; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
706; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
707; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
708; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
709; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
710; GFX10-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
711; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
712; GFX10-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
713; GFX10-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
714; GFX10-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
715; GFX10-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
716; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
717; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
718;
719; GFX11-SDAG-LABEL: intrinsic_llround_i64_f64:
720; GFX11-SDAG:       ; %bb.0: ; %entry
721; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GFX11-SDAG-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
723; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
724; GFX11-SDAG-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
725; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
726; GFX11-SDAG-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
727; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
728; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
729; GFX11-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v4, v1
730; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
731; GFX11-SDAG-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
732; GFX11-SDAG-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
733; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
734; GFX11-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
735; GFX11-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
736; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
737; GFX11-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
738; GFX11-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
739; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
740; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
741;
742; GFX11-GISEL-LABEL: intrinsic_llround_i64_f64:
743; GFX11-GISEL:       ; %bb.0: ; %entry
744; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
746; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
747; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
748; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
749; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
750; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
751; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
752; GFX11-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
753; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
754; GFX11-GISEL-NEXT:    v_add_f64 v[0:1], v[2:3], v[0:1]
755; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
756; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
757; GFX11-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
758; GFX11-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
759; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
760; GFX11-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
761; GFX11-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
762; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
763; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
764entry:
765  %res = tail call i64 @llvm.llround.i64.f64(double %arg)
766  ret i64 %res
767}
768
769define half @intrinsic_fround_half(half %arg) {
770; GFX9-SDAG-LABEL: intrinsic_fround_half:
771; GFX9-SDAG:       ; %bb.0: ; %entry
772; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773; GFX9-SDAG-NEXT:    v_trunc_f16_e32 v1, v0
774; GFX9-SDAG-NEXT:    v_sub_f16_e32 v2, v0, v1
775; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0x3c00
776; GFX9-SDAG-NEXT:    v_cmp_ge_f16_e64 vcc, |v2|, 0.5
777; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
778; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x7fff
779; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s4, v2, v0
780; GFX9-SDAG-NEXT:    v_add_f16_e32 v0, v1, v0
781; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
782;
783; GFX9-GISEL-LABEL: intrinsic_fround_half:
784; GFX9-GISEL:       ; %bb.0: ; %entry
785; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786; GFX9-GISEL-NEXT:    v_trunc_f16_e32 v1, v0
787; GFX9-GISEL-NEXT:    v_sub_f16_e32 v2, v0, v1
788; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3c00
789; GFX9-GISEL-NEXT:    v_cmp_ge_f16_e64 vcc, |v2|, 0.5
790; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
791; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff8000, v0
792; GFX9-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
793; GFX9-GISEL-NEXT:    v_add_f16_e32 v0, v1, v0
794; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
795;
796; GFX10-SDAG-LABEL: intrinsic_fround_half:
797; GFX10-SDAG:       ; %bb.0: ; %entry
798; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799; GFX10-SDAG-NEXT:    v_trunc_f16_e32 v1, v0
800; GFX10-SDAG-NEXT:    v_sub_f16_e32 v2, v0, v1
801; GFX10-SDAG-NEXT:    v_cmp_ge_f16_e64 s4, |v2|, 0.5
802; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s4
803; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fff, v2, v0
804; GFX10-SDAG-NEXT:    v_add_f16_e32 v0, v1, v0
805; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
806;
807; GFX10-GISEL-LABEL: intrinsic_fround_half:
808; GFX10-GISEL:       ; %bb.0: ; %entry
809; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
810; GFX10-GISEL-NEXT:    v_trunc_f16_e32 v1, v0
811; GFX10-GISEL-NEXT:    v_sub_f16_e32 v2, v0, v1
812; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff8000, v0
813; GFX10-GISEL-NEXT:    v_cmp_ge_f16_e64 s4, |v2|, 0.5
814; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s4
815; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
816; GFX10-GISEL-NEXT:    v_add_f16_e32 v0, v1, v0
817; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
818;
819; GFX11-SDAG-LABEL: intrinsic_fround_half:
820; GFX11-SDAG:       ; %bb.0: ; %entry
821; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
822; GFX11-SDAG-NEXT:    v_trunc_f16_e32 v1, v0
823; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
824; GFX11-SDAG-NEXT:    v_sub_f16_e32 v2, v0, v1
825; GFX11-SDAG-NEXT:    v_cmp_ge_f16_e64 s0, |v2|, 0.5
826; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
827; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s0
828; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fff, v2, v0
829; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
830; GFX11-SDAG-NEXT:    v_add_f16_e32 v0, v1, v0
831; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
832;
833; GFX11-GISEL-LABEL: intrinsic_fround_half:
834; GFX11-GISEL:       ; %bb.0: ; %entry
835; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
836; GFX11-GISEL-NEXT:    v_trunc_f16_e32 v1, v0
837; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
838; GFX11-GISEL-NEXT:    v_sub_f16_e32 v2, v0, v1
839; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff8000, v0
840; GFX11-GISEL-NEXT:    v_cmp_ge_f16_e64 s0, |v2|, 0.5
841; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
842; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s0
843; GFX11-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
844; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
845; GFX11-GISEL-NEXT:    v_add_f16_e32 v0, v1, v0
846; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
847entry:
848  %res = tail call half @llvm.round.f16(half %arg)
849  ret half %res
850}
851
852define i32 @intrinsic_lround_i32_f16(half %arg) {
853; GFX9-SDAG-LABEL: intrinsic_lround_i32_f16:
854; GFX9-SDAG:       ; %bb.0: ; %entry
855; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856; GFX9-SDAG-NEXT:    v_trunc_f16_e32 v1, v0
857; GFX9-SDAG-NEXT:    v_sub_f16_e32 v2, v0, v1
858; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0x3c00
859; GFX9-SDAG-NEXT:    v_cmp_ge_f16_e64 vcc, |v2|, 0.5
860; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
861; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x7fff
862; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s4, v2, v0
863; GFX9-SDAG-NEXT:    v_add_f16_e32 v0, v1, v0
864; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
865; GFX9-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
866; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
867;
868; GFX9-GISEL-LABEL: intrinsic_lround_i32_f16:
869; GFX9-GISEL:       ; %bb.0: ; %entry
870; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
871; GFX9-GISEL-NEXT:    v_trunc_f16_e32 v1, v0
872; GFX9-GISEL-NEXT:    v_sub_f16_e32 v2, v0, v1
873; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3c00
874; GFX9-GISEL-NEXT:    v_cmp_ge_f16_e64 vcc, |v2|, 0.5
875; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
876; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff8000, v0
877; GFX9-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
878; GFX9-GISEL-NEXT:    v_add_f16_e32 v0, v1, v0
879; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
880; GFX9-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
881; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
882;
883; GFX10-SDAG-LABEL: intrinsic_lround_i32_f16:
884; GFX10-SDAG:       ; %bb.0: ; %entry
885; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
886; GFX10-SDAG-NEXT:    v_trunc_f16_e32 v1, v0
887; GFX10-SDAG-NEXT:    v_sub_f16_e32 v2, v0, v1
888; GFX10-SDAG-NEXT:    v_cmp_ge_f16_e64 s4, |v2|, 0.5
889; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s4
890; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fff, v2, v0
891; GFX10-SDAG-NEXT:    v_add_f16_e32 v0, v1, v0
892; GFX10-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
893; GFX10-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
894; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
895;
896; GFX10-GISEL-LABEL: intrinsic_lround_i32_f16:
897; GFX10-GISEL:       ; %bb.0: ; %entry
898; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
899; GFX10-GISEL-NEXT:    v_trunc_f16_e32 v1, v0
900; GFX10-GISEL-NEXT:    v_sub_f16_e32 v2, v0, v1
901; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff8000, v0
902; GFX10-GISEL-NEXT:    v_cmp_ge_f16_e64 s4, |v2|, 0.5
903; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s4
904; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
905; GFX10-GISEL-NEXT:    v_add_f16_e32 v0, v1, v0
906; GFX10-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
907; GFX10-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
908; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
909;
910; GFX11-SDAG-LABEL: intrinsic_lround_i32_f16:
911; GFX11-SDAG:       ; %bb.0: ; %entry
912; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; GFX11-SDAG-NEXT:    v_trunc_f16_e32 v1, v0
914; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
915; GFX11-SDAG-NEXT:    v_sub_f16_e32 v2, v0, v1
916; GFX11-SDAG-NEXT:    v_cmp_ge_f16_e64 s0, |v2|, 0.5
917; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
918; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s0
919; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fff, v2, v0
920; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
921; GFX11-SDAG-NEXT:    v_add_f16_e32 v0, v1, v0
922; GFX11-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
923; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
924; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
925; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
926;
927; GFX11-GISEL-LABEL: intrinsic_lround_i32_f16:
928; GFX11-GISEL:       ; %bb.0: ; %entry
929; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
930; GFX11-GISEL-NEXT:    v_trunc_f16_e32 v1, v0
931; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
932; GFX11-GISEL-NEXT:    v_sub_f16_e32 v2, v0, v1
933; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff8000, v0
934; GFX11-GISEL-NEXT:    v_cmp_ge_f16_e64 s0, |v2|, 0.5
935; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
936; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s0
937; GFX11-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
938; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
939; GFX11-GISEL-NEXT:    v_add_f16_e32 v0, v1, v0
940; GFX11-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
941; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
942; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
943; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
944entry:
945  %res = tail call i32 @llvm.lround.i32.f16(half %arg)
946  ret i32 %res
947}
948
949define <2 x i32> @intrinsic_lround_v2i32_v2f32(<2 x float> %arg) {
950; GFX9-SDAG-LABEL: intrinsic_lround_v2i32_v2f32:
951; GFX9-SDAG:       ; %bb.0: ; %entry
952; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v2, v0
954; GFX9-SDAG-NEXT:    v_sub_f32_e32 v3, v0, v2
955; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5
956; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s[4:5]
957; GFX9-SDAG-NEXT:    s_brev_b32 s6, -2
958; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s6, v3, v0
959; GFX9-SDAG-NEXT:    v_add_f32_e32 v0, v2, v0
960; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v2, v1
961; GFX9-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
962; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5
963; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s[4:5]
964; GFX9-SDAG-NEXT:    v_bfi_b32 v1, s6, v3, v1
965; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, v2, v1
966; GFX9-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
967; GFX9-SDAG-NEXT:    v_cvt_i32_f32_e32 v1, v1
968; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
969;
970; GFX9-GISEL-LABEL: intrinsic_lround_v2i32_v2f32:
971; GFX9-GISEL:       ; %bb.0: ; %entry
972; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
973; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
974; GFX9-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
975; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5
976; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s[4:5]
977; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v4, 1
978; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, v4, v3
979; GFX9-GISEL-NEXT:    v_add_f32_e32 v0, v2, v0
980; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v2, v1
981; GFX9-GISEL-NEXT:    v_sub_f32_e32 v3, v1, v2
982; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5
983; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s[4:5]
984; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, v4, v3
985; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, v2, v1
986; GFX9-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
987; GFX9-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v1
988; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
989;
990; GFX10-SDAG-LABEL: intrinsic_lround_v2i32_v2f32:
991; GFX10-SDAG:       ; %bb.0: ; %entry
992; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v2, v0
994; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v3, v1
995; GFX10-SDAG-NEXT:    v_sub_f32_e32 v4, v0, v2
996; GFX10-SDAG-NEXT:    v_sub_f32_e32 v5, v1, v3
997; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v4|, 0.5
998; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s4
999; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v5|, 0.5
1000; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v4, v0
1001; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s4
1002; GFX10-SDAG-NEXT:    v_add_f32_e32 v0, v2, v0
1003; GFX10-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v5, v1
1004; GFX10-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
1005; GFX10-SDAG-NEXT:    v_add_f32_e32 v1, v3, v1
1006; GFX10-SDAG-NEXT:    v_cvt_i32_f32_e32 v1, v1
1007; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1008;
1009; GFX10-GISEL-LABEL: intrinsic_lround_v2i32_v2f32:
1010; GFX10-GISEL:       ; %bb.0: ; %entry
1011; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1012; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1013; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
1014; GFX10-GISEL-NEXT:    v_sub_f32_e32 v4, v0, v2
1015; GFX10-GISEL-NEXT:    v_sub_f32_e32 v5, v1, v3
1016; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v4|, 0.5
1017; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s4
1018; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v5|, 0.5
1019; GFX10-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v4
1020; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s4
1021; GFX10-GISEL-NEXT:    v_add_f32_e32 v0, v2, v0
1022; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v5
1023; GFX10-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
1024; GFX10-GISEL-NEXT:    v_add_f32_e32 v1, v3, v1
1025; GFX10-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v1
1026; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1027;
1028; GFX11-SDAG-LABEL: intrinsic_lround_v2i32_v2f32:
1029; GFX11-SDAG:       ; %bb.0: ; %entry
1030; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v2, v0
1032; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v3, v1
1033; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1034; GFX11-SDAG-NEXT:    v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3
1035; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v4|, 0.5
1036; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
1037; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s0
1038; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v5|, 0.5
1039; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1040; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v4, v0
1041; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s0
1042; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1043; GFX11-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v5, v1
1044; GFX11-SDAG-NEXT:    v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1
1045; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1046; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, v0
1047; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v1, v1
1048; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1049;
1050; GFX11-GISEL-LABEL: intrinsic_lround_v2i32_v2f32:
1051; GFX11-GISEL:       ; %bb.0: ; %entry
1052; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1054; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
1055; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1056; GFX11-GISEL-NEXT:    v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3
1057; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v4|, 0.5
1058; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
1059; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s0
1060; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v5|, 0.5
1061; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1062; GFX11-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v4
1063; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s0
1064; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1065; GFX11-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v5
1066; GFX11-GISEL-NEXT:    v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1
1067; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1068; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
1069; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v1
1070; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1071entry:
1072  %res = tail call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> %arg)
1073  ret <2 x i32> %res
1074}
1075
1076define <2 x i64> @intrinsic_lround_v2i64_v2f32(<2 x float> %arg) {
1077; GFX9-SDAG-LABEL: intrinsic_lround_v2i64_v2f32:
1078; GFX9-SDAG:       ; %bb.0: ; %entry
1079; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1080; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v2, v0
1081; GFX9-SDAG-NEXT:    v_sub_f32_e32 v3, v0, v2
1082; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5
1083; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s[4:5]
1084; GFX9-SDAG-NEXT:    s_brev_b32 s6, -2
1085; GFX9-SDAG-NEXT:    v_bfi_b32 v0, s6, v3, v0
1086; GFX9-SDAG-NEXT:    v_add_f32_e32 v0, v2, v0
1087; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
1088; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0x2f800000
1089; GFX9-SDAG-NEXT:    v_mul_f32_e64 v2, |v0|, s7
1090; GFX9-SDAG-NEXT:    v_floor_f32_e32 v2, v2
1091; GFX9-SDAG-NEXT:    s_mov_b32 s8, 0xcf800000
1092; GFX9-SDAG-NEXT:    v_fma_f32 v3, v2, s8, |v0|
1093; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
1094; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
1095; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
1096; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v3, v4
1097; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v3, v1
1098; GFX9-SDAG-NEXT:    v_sub_f32_e32 v5, v1, v3
1099; GFX9-SDAG-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v5|, 0.5
1100; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s[4:5]
1101; GFX9-SDAG-NEXT:    v_bfi_b32 v1, s6, v5, v1
1102; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, v3, v1
1103; GFX9-SDAG-NEXT:    v_trunc_f32_e32 v3, v1
1104; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v3|, s7
1105; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
1106; GFX9-SDAG-NEXT:    v_fma_f32 v5, v1, s8, |v3|
1107; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v5, v5
1108; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v6, v1
1109; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v4
1110; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v4
1111; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v3
1112; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v4, vcc
1113; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v5, v3
1114; GFX9-SDAG-NEXT:    v_xor_b32_e32 v4, v6, v3
1115; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
1116; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
1117; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1118;
1119; GFX9-GISEL-LABEL: intrinsic_lround_v2i64_v2f32:
1120; GFX9-GISEL:       ; %bb.0: ; %entry
1121; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1122; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1123; GFX9-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
1124; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5
1125; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s[4:5]
1126; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v4, 1
1127; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, v4, v3
1128; GFX9-GISEL-NEXT:    v_add_f32_e32 v0, v2, v0
1129; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1130; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x2f800000
1131; GFX9-GISEL-NEXT:    v_mul_f32_e64 v5, |v2|, v3
1132; GFX9-GISEL-NEXT:    v_floor_f32_e32 v5, v5
1133; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0xcf800000
1134; GFX9-GISEL-NEXT:    v_fma_f32 v2, v5, v6, |v2|
1135; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
1136; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1137; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v0
1138; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v7
1139; GFX9-GISEL-NEXT:    v_xor_b32_e32 v2, v5, v7
1140; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v5, v1
1141; GFX9-GISEL-NEXT:    v_sub_f32_e32 v8, v1, v5
1142; GFX9-GISEL-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v8|, 0.5
1143; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1.0, s[4:5]
1144; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, v4, v8
1145; GFX9-GISEL-NEXT:    v_add_f32_e32 v4, v5, v1
1146; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v4
1147; GFX9-GISEL-NEXT:    v_mul_f32_e64 v3, |v1|, v3
1148; GFX9-GISEL-NEXT:    v_floor_f32_e32 v3, v3
1149; GFX9-GISEL-NEXT:    v_fma_f32 v1, v3, v6, |v1|
1150; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v1
1151; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
1152; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v7
1153; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v4
1154; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v7, vcc
1155; GFX9-GISEL-NEXT:    v_xor_b32_e32 v2, v5, v4
1156; GFX9-GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
1157; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v4
1158; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
1159; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1160;
1161; GFX10-SDAG-LABEL: intrinsic_lround_v2i64_v2f32:
1162; GFX10-SDAG:       ; %bb.0: ; %entry
1163; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v2, v0
1165; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v3, v1
1166; GFX10-SDAG-NEXT:    v_sub_f32_e32 v4, v0, v2
1167; GFX10-SDAG-NEXT:    v_sub_f32_e32 v5, v1, v3
1168; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v4|, 0.5
1169; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s4
1170; GFX10-SDAG-NEXT:    v_cmp_ge_f32_e64 s4, |v5|, 0.5
1171; GFX10-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v4, v0
1172; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s4
1173; GFX10-SDAG-NEXT:    v_add_f32_e32 v0, v2, v0
1174; GFX10-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v5, v1
1175; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
1176; GFX10-SDAG-NEXT:    v_add_f32_e32 v1, v3, v1
1177; GFX10-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
1178; GFX10-SDAG-NEXT:    v_trunc_f32_e32 v1, v1
1179; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
1180; GFX10-SDAG-NEXT:    v_floor_f32_e32 v2, v2
1181; GFX10-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
1182; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
1183; GFX10-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
1184; GFX10-SDAG-NEXT:    v_floor_f32_e32 v3, v3
1185; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
1186; GFX10-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
1187; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v4
1188; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
1189; GFX10-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v5
1190; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
1191; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v5
1192; GFX10-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v6
1193; GFX10-SDAG-NEXT:    v_xor_b32_e32 v4, v0, v6
1194; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
1195; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
1196; GFX10-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
1197; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
1198; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1199;
1200; GFX10-GISEL-LABEL: intrinsic_lround_v2i64_v2f32:
1201; GFX10-GISEL:       ; %bb.0: ; %entry
1202; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1204; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
1205; GFX10-GISEL-NEXT:    v_sub_f32_e32 v4, v0, v2
1206; GFX10-GISEL-NEXT:    v_sub_f32_e32 v5, v1, v3
1207; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v4|, 0.5
1208; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s4
1209; GFX10-GISEL-NEXT:    v_cmp_ge_f32_e64 s4, |v5|, 0.5
1210; GFX10-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v4
1211; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s4
1212; GFX10-GISEL-NEXT:    v_add_f32_e32 v0, v2, v0
1213; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v5
1214; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1215; GFX10-GISEL-NEXT:    v_add_f32_e32 v1, v3, v1
1216; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
1217; GFX10-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
1218; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
1219; GFX10-GISEL-NEXT:    v_floor_f32_e32 v4, v4
1220; GFX10-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
1221; GFX10-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
1222; GFX10-GISEL-NEXT:    v_floor_f32_e32 v5, v5
1223; GFX10-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
1224; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
1225; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
1226; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v4
1227; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v5
1228; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
1229; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
1230; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
1231; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, v4, v3
1232; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, v0, v3
1233; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
1234; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
1235; GFX10-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
1236; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
1237; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1238;
1239; GFX11-SDAG-LABEL: intrinsic_lround_v2i64_v2f32:
1240; GFX11-SDAG:       ; %bb.0: ; %entry
1241; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v2, v0
1243; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v3, v1
1244; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1245; GFX11-SDAG-NEXT:    v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3
1246; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v4|, 0.5
1247; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
1248; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s0
1249; GFX11-SDAG-NEXT:    v_cmp_ge_f32_e64 s0, |v5|, 0.5
1250; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1251; GFX11-SDAG-NEXT:    v_bfi_b32 v0, 0x7fffffff, v4, v0
1252; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s0
1253; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1254; GFX11-SDAG-NEXT:    v_bfi_b32 v1, 0x7fffffff, v5, v1
1255; GFX11-SDAG-NEXT:    v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1
1256; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1257; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v0, v0
1258; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v1, v1
1259; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1260; GFX11-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
1261; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
1262; GFX11-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
1263; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1264; GFX11-SDAG-NEXT:    v_floor_f32_e32 v2, v2
1265; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
1266; GFX11-SDAG-NEXT:    v_floor_f32_e32 v3, v3
1267; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1268; GFX11-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
1269; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
1270; GFX11-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
1271; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1272; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v4
1273; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
1274; GFX11-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v5
1275; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1276; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
1277; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v5
1278; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1279; GFX11-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v6
1280; GFX11-SDAG-NEXT:    v_xor_b32_e32 v4, v0, v6
1281; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1282; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
1283; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
1284; GFX11-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
1285; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
1286; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1287;
1288; GFX11-GISEL-LABEL: intrinsic_lround_v2i64_v2f32:
1289; GFX11-GISEL:       ; %bb.0: ; %entry
1290; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1292; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
1293; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1294; GFX11-GISEL-NEXT:    v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3
1295; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v4|, 0.5
1296; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
1297; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s0
1298; GFX11-GISEL-NEXT:    v_cmp_ge_f32_e64 s0, |v5|, 0.5
1299; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1300; GFX11-GISEL-NEXT:    v_and_or_b32 v0, 0x80000000, v0, v4
1301; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1.0, s0
1302; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1303; GFX11-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v5
1304; GFX11-GISEL-NEXT:    v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1
1305; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1306; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
1307; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
1308; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
1309; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1310; GFX11-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
1311; GFX11-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
1312; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1313; GFX11-GISEL-NEXT:    v_floor_f32_e32 v4, v4
1314; GFX11-GISEL-NEXT:    v_floor_f32_e32 v5, v5
1315; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1316; GFX11-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
1317; GFX11-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
1318; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
1319; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
1320; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
1321; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v4
1322; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v5
1323; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
1324; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
1325; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1326; GFX11-GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
1327; GFX11-GISEL-NEXT:    v_xor_b32_e32 v4, v4, v3
1328; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1329; GFX11-GISEL-NEXT:    v_xor_b32_e32 v5, v0, v3
1330; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
1331; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1332; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
1333; GFX11-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
1334; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
1335; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1336entry:
1337  %res = tail call <2 x i64> @llvm.lround.v2i64.v2f32(<2 x float> %arg)
1338  ret <2 x i64> %res
1339}
1340