xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lrint.ll (revision 0ee32c45730c94be1b7d5fa60a0e8dff5751d014)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2
3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-SDAG %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-GISEL %s
7; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
8; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
9
10declare float @llvm.rint.f32(float)
11declare i32 @llvm.lrint.i32.f32(float)
12declare i32 @llvm.lrint.i32.f64(double)
13declare i64 @llvm.lrint.i64.f32(float)
14declare i64 @llvm.lrint.i64.f64(double)
15declare i64 @llvm.llrint.i64.f32(float)
16declare half @llvm.rint.f16(half)
17declare i32 @llvm.lrint.i32.f16(half %arg)
18declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
19declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
20declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
21
22define float @intrinsic_frint(float %arg) {
23; GCN-LABEL: intrinsic_frint:
24; GCN:       ; %bb.0: ; %entry
25; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GCN-NEXT:    v_rndne_f32_e32 v0, v0
27; GCN-NEXT:    s_setpc_b64 s[30:31]
28entry:
29  %res = tail call float @llvm.rint.f32(float %arg)
30  ret float %res
31}
32
33define i32 @intrinsic_lrint_i32_f32(float %arg) {
34; GFX9-LABEL: intrinsic_lrint_i32_f32:
35; GFX9:       ; %bb.0: ; %entry
36; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
38; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
39; GFX9-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX10-LABEL: intrinsic_lrint_i32_f32:
42; GFX10:       ; %bb.0: ; %entry
43; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
45; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
46; GFX10-NEXT:    s_setpc_b64 s[30:31]
47;
48; GFX11-LABEL: intrinsic_lrint_i32_f32:
49; GFX11:       ; %bb.0: ; %entry
50; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
52; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
53; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
54; GFX11-NEXT:    s_setpc_b64 s[30:31]
55entry:
56  %res = tail call i32 @llvm.lrint.i32.f32(float %arg)
57  ret i32 %res
58}
59
60define i32 @intrinsic_lrint_i32_f64(double %arg) {
61; GFX9-LABEL: intrinsic_lrint_i32_f64:
62; GFX9:       ; %bb.0: ; %entry
63; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
65; GFX9-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
66; GFX9-NEXT:    s_setpc_b64 s[30:31]
67;
68; GFX10-LABEL: intrinsic_lrint_i32_f64:
69; GFX10:       ; %bb.0: ; %entry
70; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
72; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
73; GFX10-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX11-LABEL: intrinsic_lrint_i32_f64:
76; GFX11:       ; %bb.0: ; %entry
77; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
79; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
80; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
81; GFX11-NEXT:    s_setpc_b64 s[30:31]
82entry:
83  %res = tail call i32 @llvm.lrint.i32.f64(double %arg)
84  ret i32 %res
85}
86
87define i64 @intrinsic_lrint_i64_f32(float %arg) {
88; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f32:
89; GFX9-SDAG:       ; %bb.0: ; %entry
90; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
92; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
93; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v0|, s4
94; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
95; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0xcf800000
96; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v1
97; GFX9-SDAG-NEXT:    v_fma_f32 v1, v1, s4, |v0|
98; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
99; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
100; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v3
101; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v1, v3
102; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
103; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
104; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f32:
107; GFX9-GISEL:       ; %bb.0: ; %entry
108; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
110; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
111; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x2f800000
112; GFX9-GISEL-NEXT:    v_mul_f32_e64 v2, |v1|, v2
113; GFX9-GISEL-NEXT:    v_floor_f32_e32 v2, v2
114; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0xcf800000
115; GFX9-GISEL-NEXT:    v_fma_f32 v1, v2, v3, |v1|
116; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
117; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
118; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
119; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v1, v3
120; GFX9-GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
121; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
122; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
123; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f32:
126; GFX10-SDAG:       ; %bb.0: ; %entry
127; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
129; GFX10-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
130; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
131; GFX10-SDAG-NEXT:    v_floor_f32_e32 v1, v1
132; GFX10-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
133; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
134; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
135; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
136; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
137; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
138; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
139; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
140;
141; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f32:
142; GFX10-GISEL:       ; %bb.0: ; %entry
143; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
145; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
146; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
147; GFX10-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
148; GFX10-GISEL-NEXT:    v_floor_f32_e32 v2, v2
149; GFX10-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
150; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
151; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
152; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
153; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
154; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
155; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
156; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
157;
158; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f32:
159; GFX11-SDAG:       ; %bb.0: ; %entry
160; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
162; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
163; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
164; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
165; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
166; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
167; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
168; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
169; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
170; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
171; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
172; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
173; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
174; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
175; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
176; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
177;
178; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f32:
179; GFX11-GISEL:       ; %bb.0: ; %entry
180; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
182; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
183; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
184; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
185; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
186; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
187; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
188; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
189; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
190; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
191; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
192; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
193; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
194; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
195; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
196; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
197; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
198; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
199entry:
200  %res = tail call i64 @llvm.lrint.i64.f32(float %arg)
201  ret i64 %res
202}
203
204define i64 @intrinsic_lrint_i64_f64(double %arg) {
205; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f64:
206; GFX9-SDAG:       ; %bb.0: ; %entry
207; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GFX9-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
209; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0xffe0
210; GFX9-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
211; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
212; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xc1f00000
213; GFX9-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
214; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
215; GFX9-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
216; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
217; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f64:
220; GFX9-GISEL:       ; %bb.0: ; %entry
221; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX9-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
223; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
224; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3df00000
225; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
226; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
227; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
228; GFX9-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
229; GFX9-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
230; GFX9-GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
231; GFX9-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
232; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
233; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
234;
235; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f64:
236; GFX10-SDAG:       ; %bb.0: ; %entry
237; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX10-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
239; GFX10-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
240; GFX10-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
241; GFX10-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
242; GFX10-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
243; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
244; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
245;
246; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f64:
247; GFX10-GISEL:       ; %bb.0: ; %entry
248; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249; GFX10-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
250; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
251; GFX10-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
252; GFX10-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
253; GFX10-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
254; GFX10-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
255; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
256; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
257;
258; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f64:
259; GFX11-SDAG:       ; %bb.0: ; %entry
260; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; GFX11-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
262; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
263; GFX11-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
264; GFX11-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
265; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
266; GFX11-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
267; GFX11-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
268; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
269; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
270;
271; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f64:
272; GFX11-GISEL:       ; %bb.0: ; %entry
273; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX11-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
275; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
276; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
277; GFX11-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
278; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
279; GFX11-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
280; GFX11-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
281; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
282; GFX11-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
283; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
284; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
285entry:
286  %res = tail call i64 @llvm.lrint.i64.f64(double %arg)
287  ret i64 %res
288}
289
290define i64 @intrinsic_llrint_i64_f32(float %arg) {
291; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f32:
292; GFX9-SDAG:       ; %bb.0: ; %entry
293; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
295; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
296; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v0|, s4
297; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
298; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0xcf800000
299; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v1
300; GFX9-SDAG-NEXT:    v_fma_f32 v1, v1, s4, |v0|
301; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
302; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
303; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v3
304; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v1, v3
305; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
306; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
307; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
308;
309; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f32:
310; GFX9-GISEL:       ; %bb.0: ; %entry
311; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
313; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
314; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x2f800000
315; GFX9-GISEL-NEXT:    v_mul_f32_e64 v2, |v1|, v2
316; GFX9-GISEL-NEXT:    v_floor_f32_e32 v2, v2
317; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0xcf800000
318; GFX9-GISEL-NEXT:    v_fma_f32 v1, v2, v3, |v1|
319; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
320; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
321; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
322; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v1, v3
323; GFX9-GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
324; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
325; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
326; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
327;
328; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f32:
329; GFX10-SDAG:       ; %bb.0: ; %entry
330; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
331; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
332; GFX10-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
333; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
334; GFX10-SDAG-NEXT:    v_floor_f32_e32 v1, v1
335; GFX10-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
336; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
337; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
338; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
339; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
340; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
341; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
342; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
343;
344; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f32:
345; GFX10-GISEL:       ; %bb.0: ; %entry
346; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
348; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
349; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
350; GFX10-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
351; GFX10-GISEL-NEXT:    v_floor_f32_e32 v2, v2
352; GFX10-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
353; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
354; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
355; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
356; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
357; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
358; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
359; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
360;
361; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f32:
362; GFX11-SDAG:       ; %bb.0: ; %entry
363; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
365; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
366; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
367; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
368; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
369; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
370; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
371; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
372; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
373; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
374; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
375; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
376; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
377; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
378; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
379; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
380;
381; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f32:
382; GFX11-GISEL:       ; %bb.0: ; %entry
383; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
385; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
386; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
387; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
388; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
389; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
390; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
391; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
392; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
393; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
394; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
395; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
396; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
397; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
398; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
399; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
400; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
401; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
402entry:
403  %res = tail call i64 @llvm.llrint.i64.f32(float %arg)
404  ret i64 %res
405}
406
407define i64 @intrinsic_llrint_i64_f64(double %arg) {
408; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f64:
409; GFX9-SDAG:       ; %bb.0: ; %entry
410; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411; GFX9-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
412; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0xffe0
413; GFX9-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
414; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
415; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xc1f00000
416; GFX9-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
417; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
418; GFX9-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
419; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
420; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
421;
422; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f64:
423; GFX9-GISEL:       ; %bb.0: ; %entry
424; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425; GFX9-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
426; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
427; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3df00000
428; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
429; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
430; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
431; GFX9-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
432; GFX9-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
433; GFX9-GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
434; GFX9-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
435; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
436; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
437;
438; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f64:
439; GFX10-SDAG:       ; %bb.0: ; %entry
440; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441; GFX10-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
442; GFX10-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
443; GFX10-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
444; GFX10-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
445; GFX10-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
446; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
447; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
448;
449; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f64:
450; GFX10-GISEL:       ; %bb.0: ; %entry
451; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX10-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
453; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
454; GFX10-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
455; GFX10-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
456; GFX10-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
457; GFX10-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
458; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
459; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
460;
461; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f64:
462; GFX11-SDAG:       ; %bb.0: ; %entry
463; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464; GFX11-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
465; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
466; GFX11-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
467; GFX11-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
468; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
469; GFX11-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
470; GFX11-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
471; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
472; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
473;
474; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f64:
475; GFX11-GISEL:       ; %bb.0: ; %entry
476; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; GFX11-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
478; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
479; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
480; GFX11-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
481; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
482; GFX11-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
483; GFX11-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
484; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
485; GFX11-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
486; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
487; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
488entry:
489  %res = tail call i64 @llvm.llrint.i64.f64(double %arg)
490  ret i64 %res
491}
492
493define half @intrinsic_frint_half(half %arg) {
494; GCN-LABEL: intrinsic_frint_half:
495; GCN:       ; %bb.0: ; %entry
496; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497; GCN-NEXT:    v_rndne_f16_e32 v0, v0
498; GCN-NEXT:    s_setpc_b64 s[30:31]
499entry:
500  %res = tail call half @llvm.rint.f16(half %arg)
501  ret half %res
502}
503
504define i32 @intrinsic_lrint_i32_f16(half %arg) {
505; GFX9-LABEL: intrinsic_lrint_i32_f16:
506; GFX9:       ; %bb.0: ; %entry
507; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
509; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
510; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
511; GFX9-NEXT:    s_setpc_b64 s[30:31]
512;
513; GFX10-LABEL: intrinsic_lrint_i32_f16:
514; GFX10:       ; %bb.0: ; %entry
515; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX10-NEXT:    v_rndne_f16_e32 v0, v0
517; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
518; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
519; GFX10-NEXT:    s_setpc_b64 s[30:31]
520;
521; GFX11-LABEL: intrinsic_lrint_i32_f16:
522; GFX11:       ; %bb.0: ; %entry
523; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
525; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
526; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
527; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
528; GFX11-NEXT:    s_setpc_b64 s[30:31]
529entry:
530  %res = tail call i32 @llvm.lrint.i32.f16(half %arg)
531  ret i32 %res
532}
533
534define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
535; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
536; GCN:       ; %bb.0: ; %entry
537; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538; GCN-NEXT:    v_rndne_f32_e32 v0, v0
539; GCN-NEXT:    v_rndne_f32_e32 v1, v1
540; GCN-NEXT:    s_setpc_b64 s[30:31]
541entry:
542  %res = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
543  ret <2 x float> %res
544}
545
546define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
547; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
548; GFX9:       ; %bb.0: ; %entry
549; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
551; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
552; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
553; GFX9-NEXT:    v_cvt_i32_f32_e32 v1, v1
554; GFX9-NEXT:    s_setpc_b64 s[30:31]
555;
556; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
557; GFX10:       ; %bb.0: ; %entry
558; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
560; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
561; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
562; GFX10-NEXT:    v_cvt_i32_f32_e32 v1, v1
563; GFX10-NEXT:    s_setpc_b64 s[30:31]
564;
565; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
566; GFX11:       ; %bb.0: ; %entry
567; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
569; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
570; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
571; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
572; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
573; GFX11-NEXT:    s_setpc_b64 s[30:31]
574entry:
575  %res = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
576  ret <2 x i32> %res
577}
578
579define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
580; GFX9-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
581; GFX9-SDAG:       ; %bb.0: ; %entry
582; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
584; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
585; GFX9-SDAG-NEXT:    v_mul_f32_e64 v2, |v0|, s4
586; GFX9-SDAG-NEXT:    v_floor_f32_e32 v2, v2
587; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xcf800000
588; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v2
589; GFX9-SDAG-NEXT:    v_fma_f32 v2, v2, s5, |v0|
590; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
591; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
592; GFX9-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v4
593; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v2, v4
594; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v2, v1
595; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v2|, s4
596; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
597; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v5, v1
598; GFX9-SDAG-NEXT:    v_fma_f32 v1, v1, s5, |v2|
599; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v6, v1
600; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v4
601; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
602; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
603; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v6, v3
604; GFX9-SDAG-NEXT:    v_xor_b32_e32 v4, v5, v3
605; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
606; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
607; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
608;
609; GFX9-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
610; GFX9-GISEL:       ; %bb.0: ; %entry
611; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
613; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
614; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x2f800000
615; GFX9-GISEL-NEXT:    v_mul_f32_e64 v4, |v2|, v3
616; GFX9-GISEL-NEXT:    v_floor_f32_e32 v4, v4
617; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xcf800000
618; GFX9-GISEL-NEXT:    v_fma_f32 v2, v4, v5, |v2|
619; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
620; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
621; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
622; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v6
623; GFX9-GISEL-NEXT:    v_xor_b32_e32 v2, v4, v6
624; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v4, v1
625; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v4
626; GFX9-GISEL-NEXT:    v_mul_f32_e64 v3, |v1|, v3
627; GFX9-GISEL-NEXT:    v_floor_f32_e32 v3, v3
628; GFX9-GISEL-NEXT:    v_fma_f32 v1, v3, v5, |v1|
629; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v1
630; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
631; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
632; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v4
633; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
634; GFX9-GISEL-NEXT:    v_xor_b32_e32 v2, v5, v4
635; GFX9-GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
636; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v4
637; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
638; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX10-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
641; GFX10-SDAG:       ; %bb.0: ; %entry
642; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
644; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v1, v1
645; GFX10-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
646; GFX10-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
647; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
648; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
649; GFX10-SDAG-NEXT:    v_floor_f32_e32 v2, v2
650; GFX10-SDAG-NEXT:    v_floor_f32_e32 v3, v3
651; GFX10-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
652; GFX10-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
653; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
654; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
655; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v4
656; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
657; GFX10-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v5
658; GFX10-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v6
659; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v5
660; GFX10-SDAG-NEXT:    v_xor_b32_e32 v4, v0, v6
661; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
662; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
663; GFX10-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
664; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
665; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
666;
667; GFX10-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
668; GFX10-GISEL:       ; %bb.0: ; %entry
669; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
671; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v1, v1
672; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
673; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
674; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
675; GFX10-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
676; GFX10-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
677; GFX10-GISEL-NEXT:    v_floor_f32_e32 v4, v4
678; GFX10-GISEL-NEXT:    v_floor_f32_e32 v5, v5
679; GFX10-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
680; GFX10-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
681; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
682; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
683; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v4
684; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
685; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v5
686; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
687; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
688; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, v0, v3
689; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, v4, v3
690; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
691; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
692; GFX10-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
693; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
694; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
695;
696; GFX11-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
697; GFX11-SDAG:       ; %bb.0: ; %entry
698; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
700; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v1, v1
701; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
702; GFX11-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
703; GFX11-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
704; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
705; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
706; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
707; GFX11-SDAG-NEXT:    v_floor_f32_e32 v2, v2
708; GFX11-SDAG-NEXT:    v_floor_f32_e32 v3, v3
709; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
710; GFX11-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
711; GFX11-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
712; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
713; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
714; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
715; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v4
716; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
717; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
718; GFX11-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v5
719; GFX11-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v6
720; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
721; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v5
722; GFX11-SDAG-NEXT:    v_xor_b32_e32 v4, v0, v6
723; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
724; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
725; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
726; GFX11-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
727; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
728; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
729;
730; GFX11-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
731; GFX11-GISEL:       ; %bb.0: ; %entry
732; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
734; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v1, v1
735; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
736; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
737; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
738; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
739; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
740; GFX11-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
741; GFX11-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
742; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
743; GFX11-GISEL-NEXT:    v_floor_f32_e32 v4, v4
744; GFX11-GISEL-NEXT:    v_floor_f32_e32 v5, v5
745; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
746; GFX11-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
747; GFX11-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
748; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
749; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
750; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
751; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v4
752; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
753; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v5
754; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
755; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
756; GFX11-GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
757; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
758; GFX11-GISEL-NEXT:    v_xor_b32_e32 v5, v0, v3
759; GFX11-GISEL-NEXT:    v_xor_b32_e32 v4, v4, v3
760; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
761; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
762; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
763; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
764; GFX11-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
765; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
766; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
767entry:
768  %res = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
769  ret <2 x i64> %res
770}
771
772