xref: /llvm-project/llvm/test/CodeGen/AMDGPU/roundeven.ll (revision 89cb0eefcbb6303ba6813238d5ad37b103495d11)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SDAG_GFX6 %s
9; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=SDAG_GFX7 %s
10; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=SDAG_GFX8 %s
11; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG_GFX9 %s
12; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX10 %s
13; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX11 %s
14
15define float @v_roundeven_f32(float %x) {
16; GFX6-LABEL: v_roundeven_f32:
17; GFX6:       ; %bb.0:
18; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
20; GFX6-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX7-LABEL: v_roundeven_f32:
23; GFX7:       ; %bb.0:
24; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
26; GFX7-NEXT:    s_setpc_b64 s[30:31]
27;
28; GFX8-LABEL: v_roundeven_f32:
29; GFX8:       ; %bb.0:
30; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
32; GFX8-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX9-LABEL: v_roundeven_f32:
35; GFX9:       ; %bb.0:
36; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
38; GFX9-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX10PLUS-LABEL: v_roundeven_f32:
41; GFX10PLUS:       ; %bb.0:
42; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
44; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
45;
46; SDAG_GFX6-LABEL: v_roundeven_f32:
47; SDAG_GFX6:       ; %bb.0:
48; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
50; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
51;
52; SDAG_GFX7-LABEL: v_roundeven_f32:
53; SDAG_GFX7:       ; %bb.0:
54; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
56; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
57;
58; SDAG_GFX8-LABEL: v_roundeven_f32:
59; SDAG_GFX8:       ; %bb.0:
60; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v0, v0
62; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
63;
64; SDAG_GFX9-LABEL: v_roundeven_f32:
65; SDAG_GFX9:       ; %bb.0:
66; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v0, v0
68; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
69;
70; SDAG_GFX10PLUS-LABEL: v_roundeven_f32:
71; SDAG_GFX10PLUS:       ; %bb.0:
72; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
74; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
75  %roundeven = call float @llvm.roundeven.f32(float %x)
76  ret float %roundeven
77}
78
79define <2 x float> @v_roundeven_v2f32(<2 x float> %x) {
80; GFX6-LABEL: v_roundeven_v2f32:
81; GFX6:       ; %bb.0:
82; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
84; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
85; GFX6-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX7-LABEL: v_roundeven_v2f32:
88; GFX7:       ; %bb.0:
89; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
91; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
92; GFX7-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX8-LABEL: v_roundeven_v2f32:
95; GFX8:       ; %bb.0:
96; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
98; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
99; GFX8-NEXT:    s_setpc_b64 s[30:31]
100;
101; GFX9-LABEL: v_roundeven_v2f32:
102; GFX9:       ; %bb.0:
103; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
105; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
106; GFX9-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX10PLUS-LABEL: v_roundeven_v2f32:
109; GFX10PLUS:       ; %bb.0:
110; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
112; GFX10PLUS-NEXT:    v_rndne_f32_e32 v1, v1
113; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
114;
115; SDAG_GFX6-LABEL: v_roundeven_v2f32:
116; SDAG_GFX6:       ; %bb.0:
117; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
119; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v1, v1
120; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
121;
122; SDAG_GFX7-LABEL: v_roundeven_v2f32:
123; SDAG_GFX7:       ; %bb.0:
124; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
126; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v1, v1
127; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
128;
129; SDAG_GFX8-LABEL: v_roundeven_v2f32:
130; SDAG_GFX8:       ; %bb.0:
131; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v0, v0
133; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v1, v1
134; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
135;
136; SDAG_GFX9-LABEL: v_roundeven_v2f32:
137; SDAG_GFX9:       ; %bb.0:
138; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v0, v0
140; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v1, v1
141; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
142;
143; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f32:
144; SDAG_GFX10PLUS:       ; %bb.0:
145; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
147; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v1, v1
148; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
149  %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x)
150  ret <2 x float> %roundeven
151}
152
153define <3 x float> @v_roundeven_v3f32(<3 x float> %x) {
154; GFX6-LABEL: v_roundeven_v3f32:
155; GFX6:       ; %bb.0:
156; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
158; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
159; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
160; GFX6-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX7-LABEL: v_roundeven_v3f32:
163; GFX7:       ; %bb.0:
164; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
166; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
167; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
168; GFX7-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX8-LABEL: v_roundeven_v3f32:
171; GFX8:       ; %bb.0:
172; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
174; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
175; GFX8-NEXT:    v_rndne_f32_e32 v2, v2
176; GFX8-NEXT:    s_setpc_b64 s[30:31]
177;
178; GFX9-LABEL: v_roundeven_v3f32:
179; GFX9:       ; %bb.0:
180; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
182; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
183; GFX9-NEXT:    v_rndne_f32_e32 v2, v2
184; GFX9-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX10PLUS-LABEL: v_roundeven_v3f32:
187; GFX10PLUS:       ; %bb.0:
188; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
190; GFX10PLUS-NEXT:    v_rndne_f32_e32 v1, v1
191; GFX10PLUS-NEXT:    v_rndne_f32_e32 v2, v2
192; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
193;
194; SDAG_GFX6-LABEL: v_roundeven_v3f32:
195; SDAG_GFX6:       ; %bb.0:
196; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
198; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v1, v1
199; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v2, v2
200; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
201;
202; SDAG_GFX7-LABEL: v_roundeven_v3f32:
203; SDAG_GFX7:       ; %bb.0:
204; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
206; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v1, v1
207; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v2, v2
208; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
209;
210; SDAG_GFX8-LABEL: v_roundeven_v3f32:
211; SDAG_GFX8:       ; %bb.0:
212; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v0, v0
214; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v1, v1
215; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v2, v2
216; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
217;
218; SDAG_GFX9-LABEL: v_roundeven_v3f32:
219; SDAG_GFX9:       ; %bb.0:
220; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v0, v0
222; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v1, v1
223; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v2, v2
224; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
225;
226; SDAG_GFX10PLUS-LABEL: v_roundeven_v3f32:
227; SDAG_GFX10PLUS:       ; %bb.0:
228; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
230; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v1, v1
231; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v2, v2
232; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
233  %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
234  ret <3 x float> %roundeven
235}
236
237define <4 x float> @v_roundeven_v4f32(<4 x float> %x) {
238; GFX6-LABEL: v_roundeven_v4f32:
239; GFX6:       ; %bb.0:
240; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
242; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
243; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
244; GFX6-NEXT:    v_rndne_f32_e32 v3, v3
245; GFX6-NEXT:    s_setpc_b64 s[30:31]
246;
247; GFX7-LABEL: v_roundeven_v4f32:
248; GFX7:       ; %bb.0:
249; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
251; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
252; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
253; GFX7-NEXT:    v_rndne_f32_e32 v3, v3
254; GFX7-NEXT:    s_setpc_b64 s[30:31]
255;
256; GFX8-LABEL: v_roundeven_v4f32:
257; GFX8:       ; %bb.0:
258; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
260; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
261; GFX8-NEXT:    v_rndne_f32_e32 v2, v2
262; GFX8-NEXT:    v_rndne_f32_e32 v3, v3
263; GFX8-NEXT:    s_setpc_b64 s[30:31]
264;
265; GFX9-LABEL: v_roundeven_v4f32:
266; GFX9:       ; %bb.0:
267; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
269; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
270; GFX9-NEXT:    v_rndne_f32_e32 v2, v2
271; GFX9-NEXT:    v_rndne_f32_e32 v3, v3
272; GFX9-NEXT:    s_setpc_b64 s[30:31]
273;
274; GFX10PLUS-LABEL: v_roundeven_v4f32:
275; GFX10PLUS:       ; %bb.0:
276; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277; GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
278; GFX10PLUS-NEXT:    v_rndne_f32_e32 v1, v1
279; GFX10PLUS-NEXT:    v_rndne_f32_e32 v2, v2
280; GFX10PLUS-NEXT:    v_rndne_f32_e32 v3, v3
281; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
282;
283; SDAG_GFX6-LABEL: v_roundeven_v4f32:
284; SDAG_GFX6:       ; %bb.0:
285; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
287; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v1, v1
288; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v2, v2
289; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v3, v3
290; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
291;
292; SDAG_GFX7-LABEL: v_roundeven_v4f32:
293; SDAG_GFX7:       ; %bb.0:
294; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
296; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v1, v1
297; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v2, v2
298; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v3, v3
299; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
300;
301; SDAG_GFX8-LABEL: v_roundeven_v4f32:
302; SDAG_GFX8:       ; %bb.0:
303; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v0, v0
305; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v1, v1
306; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v2, v2
307; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v3, v3
308; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
309;
310; SDAG_GFX9-LABEL: v_roundeven_v4f32:
311; SDAG_GFX9:       ; %bb.0:
312; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v0, v0
314; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v1, v1
315; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v2, v2
316; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v3, v3
317; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
318;
319; SDAG_GFX10PLUS-LABEL: v_roundeven_v4f32:
320; SDAG_GFX10PLUS:       ; %bb.0:
321; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, v0
323; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v1, v1
324; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v2, v2
325; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v3, v3
326; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
327  %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
328  ret <4 x float> %roundeven
329}
330
331define half @v_roundeven_f16(half %x) {
332; GFX6-LABEL: v_roundeven_f16:
333; GFX6:       ; %bb.0:
334; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
336; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
337; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
338; GFX6-NEXT:    s_setpc_b64 s[30:31]
339;
340; GFX7-LABEL: v_roundeven_f16:
341; GFX7:       ; %bb.0:
342; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
344; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
345; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
346; GFX7-NEXT:    s_setpc_b64 s[30:31]
347;
348; GFX8-LABEL: v_roundeven_f16:
349; GFX8:       ; %bb.0:
350; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GFX8-NEXT:    v_rndne_f16_e32 v0, v0
352; GFX8-NEXT:    s_setpc_b64 s[30:31]
353;
354; GFX9-LABEL: v_roundeven_f16:
355; GFX9:       ; %bb.0:
356; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
358; GFX9-NEXT:    s_setpc_b64 s[30:31]
359;
360; GFX10PLUS-LABEL: v_roundeven_f16:
361; GFX10PLUS:       ; %bb.0:
362; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363; GFX10PLUS-NEXT:    v_rndne_f16_e32 v0, v0
364; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
365;
366; SDAG_GFX6-LABEL: v_roundeven_f16:
367; SDAG_GFX6:       ; %bb.0:
368; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
370; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
371; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
372; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
373;
374; SDAG_GFX7-LABEL: v_roundeven_f16:
375; SDAG_GFX7:       ; %bb.0:
376; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
378; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
379; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
380; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
381;
382; SDAG_GFX8-LABEL: v_roundeven_f16:
383; SDAG_GFX8:       ; %bb.0:
384; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385; SDAG_GFX8-NEXT:    v_rndne_f16_e32 v0, v0
386; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
387;
388; SDAG_GFX9-LABEL: v_roundeven_f16:
389; SDAG_GFX9:       ; %bb.0:
390; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391; SDAG_GFX9-NEXT:    v_rndne_f16_e32 v0, v0
392; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
393;
394; SDAG_GFX10PLUS-LABEL: v_roundeven_f16:
395; SDAG_GFX10PLUS:       ; %bb.0:
396; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397; SDAG_GFX10PLUS-NEXT:    v_rndne_f16_e32 v0, v0
398; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
399  %roundeven = call half @llvm.roundeven.f16(half %x)
400  ret half %roundeven
401}
402
403define <2 x half> @v_roundeven_v2f16(<2 x half> %x) {
404; GFX6-LABEL: v_roundeven_v2f16:
405; GFX6:       ; %bb.0:
406; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
408; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
409; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
410; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
411; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
412; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
413; GFX6-NEXT:    s_setpc_b64 s[30:31]
414;
415; GFX7-LABEL: v_roundeven_v2f16:
416; GFX7:       ; %bb.0:
417; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
419; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
420; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
421; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
422; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
423; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
424; GFX7-NEXT:    s_setpc_b64 s[30:31]
425;
426; GFX8-LABEL: v_roundeven_v2f16:
427; GFX8:       ; %bb.0:
428; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; GFX8-NEXT:    v_rndne_f16_e32 v1, v0
430; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
431; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
432; GFX8-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX9-LABEL: v_roundeven_v2f16:
435; GFX9:       ; %bb.0:
436; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX9-NEXT:    v_rndne_f16_e32 v1, v0
438; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
439; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
440; GFX9-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX10-LABEL: v_roundeven_v2f16:
443; GFX10:       ; %bb.0:
444; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX10-NEXT:    v_rndne_f16_e32 v1, v0
446; GFX10-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
447; GFX10-NEXT:    v_pack_b32_f16 v0, v1, v0
448; GFX10-NEXT:    s_setpc_b64 s[30:31]
449;
450; GFX11-LABEL: v_roundeven_v2f16:
451; GFX11:       ; %bb.0:
452; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
454; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
455; GFX11-NEXT:    v_rndne_f16_e32 v1, v1
456; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
457; GFX11-NEXT:    s_setpc_b64 s[30:31]
458;
459; SDAG_GFX6-LABEL: v_roundeven_v2f16:
460; SDAG_GFX6:       ; %bb.0:
461; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
463; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
464; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
465; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
466; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
467; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v1, v1
468; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
469;
470; SDAG_GFX7-LABEL: v_roundeven_v2f16:
471; SDAG_GFX7:       ; %bb.0:
472; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
474; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
475; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
476; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
477; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
478; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v1, v1
479; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
480;
481; SDAG_GFX8-LABEL: v_roundeven_v2f16:
482; SDAG_GFX8:       ; %bb.0:
483; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; SDAG_GFX8-NEXT:    v_rndne_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
485; SDAG_GFX8-NEXT:    v_rndne_f16_e32 v0, v0
486; SDAG_GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
487; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
488;
489; SDAG_GFX9-LABEL: v_roundeven_v2f16:
490; SDAG_GFX9:       ; %bb.0:
491; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; SDAG_GFX9-NEXT:    v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
493; SDAG_GFX9-NEXT:    v_rndne_f16_e32 v0, v0
494; SDAG_GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
495; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
496;
497; SDAG_GFX10-LABEL: v_roundeven_v2f16:
498; SDAG_GFX10:       ; %bb.0:
499; SDAG_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500; SDAG_GFX10-NEXT:    v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
501; SDAG_GFX10-NEXT:    v_rndne_f16_e32 v0, v0
502; SDAG_GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
503; SDAG_GFX10-NEXT:    s_setpc_b64 s[30:31]
504;
505; SDAG_GFX11-LABEL: v_roundeven_v2f16:
506; SDAG_GFX11:       ; %bb.0:
507; SDAG_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508; SDAG_GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
509; SDAG_GFX11-NEXT:    v_rndne_f16_e32 v0, v0
510; SDAG_GFX11-NEXT:    v_rndne_f16_e32 v1, v1
511; SDAG_GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
512; SDAG_GFX11-NEXT:    s_setpc_b64 s[30:31]
513  %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x)
514  ret <2 x half> %roundeven
515}
516
517define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) {
518; GFX6-LABEL: v_roundeven_v2f16_fneg:
519; GFX6:       ; %bb.0:
520; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
522; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
523; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
524; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
525; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v0
526; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
527; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v0
528; GFX6-NEXT:    v_rndne_f32_e32 v0, v1
529; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
530; GFX6-NEXT:    v_rndne_f32_e32 v1, v2
531; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
532; GFX6-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX7-LABEL: v_roundeven_v2f16_fneg:
535; GFX7:       ; %bb.0:
536; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
538; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
539; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
540; GFX7-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
541; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v0
542; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
543; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v0
544; GFX7-NEXT:    v_rndne_f32_e32 v0, v1
545; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
546; GFX7-NEXT:    v_rndne_f32_e32 v1, v2
547; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
548; GFX7-NEXT:    s_setpc_b64 s[30:31]
549;
550; GFX8-LABEL: v_roundeven_v2f16_fneg:
551; GFX8:       ; %bb.0:
552; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
554; GFX8-NEXT:    v_rndne_f16_e32 v1, v0
555; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
556; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
557; GFX8-NEXT:    s_setpc_b64 s[30:31]
558;
559; GFX9-LABEL: v_roundeven_v2f16_fneg:
560; GFX9:       ; %bb.0:
561; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562; GFX9-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
563; GFX9-NEXT:    v_rndne_f16_e32 v1, v0
564; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
565; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
566; GFX9-NEXT:    s_setpc_b64 s[30:31]
567;
568; GFX10-LABEL: v_roundeven_v2f16_fneg:
569; GFX10:       ; %bb.0:
570; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GFX10-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
572; GFX10-NEXT:    v_rndne_f16_e32 v1, v0
573; GFX10-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
574; GFX10-NEXT:    v_pack_b32_f16 v0, v1, v0
575; GFX10-NEXT:    s_setpc_b64 s[30:31]
576;
577; GFX11-LABEL: v_roundeven_v2f16_fneg:
578; GFX11:       ; %bb.0:
579; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580; GFX11-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
581; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
582; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
583; GFX11-NEXT:    v_rndne_f16_e32 v1, v1
584; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
585; GFX11-NEXT:    s_setpc_b64 s[30:31]
586;
587; SDAG_GFX6-LABEL: v_roundeven_v2f16_fneg:
588; SDAG_GFX6:       ; %bb.0:
589; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
591; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
592; SDAG_GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
593; SDAG_GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
594; SDAG_GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
595; SDAG_GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
596; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
597; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
598; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
599; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v1, v1
600; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
601;
602; SDAG_GFX7-LABEL: v_roundeven_v2f16_fneg:
603; SDAG_GFX7:       ; %bb.0:
604; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
606; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
607; SDAG_GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
608; SDAG_GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
609; SDAG_GFX7-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
610; SDAG_GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
611; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
612; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
613; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
614; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v1, v1
615; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
616;
617; SDAG_GFX8-LABEL: v_roundeven_v2f16_fneg:
618; SDAG_GFX8:       ; %bb.0:
619; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620; SDAG_GFX8-NEXT:    v_rndne_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
621; SDAG_GFX8-NEXT:    v_rndne_f16_e64 v0, -v0
622; SDAG_GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
623; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
624;
625; SDAG_GFX9-LABEL: v_roundeven_v2f16_fneg:
626; SDAG_GFX9:       ; %bb.0:
627; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628; SDAG_GFX9-NEXT:    v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
629; SDAG_GFX9-NEXT:    v_rndne_f16_e64 v0, -v0
630; SDAG_GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
631; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
632;
633; SDAG_GFX10-LABEL: v_roundeven_v2f16_fneg:
634; SDAG_GFX10:       ; %bb.0:
635; SDAG_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636; SDAG_GFX10-NEXT:    v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
637; SDAG_GFX10-NEXT:    v_rndne_f16_e64 v0, -v0
638; SDAG_GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
639; SDAG_GFX10-NEXT:    s_setpc_b64 s[30:31]
640;
641; SDAG_GFX11-LABEL: v_roundeven_v2f16_fneg:
642; SDAG_GFX11:       ; %bb.0:
643; SDAG_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644; SDAG_GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
645; SDAG_GFX11-NEXT:    v_rndne_f16_e64 v0, -v0
646; SDAG_GFX11-NEXT:    v_rndne_f16_e64 v1, -v1
647; SDAG_GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
648; SDAG_GFX11-NEXT:    s_setpc_b64 s[30:31]
649  %x.fneg = fneg <2 x half> %x
650  %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg)
651  ret <2 x half> %roundeven
652}
653
654define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
655; GFX6-LABEL: v_roundeven_v4f16:
656; GFX6:       ; %bb.0:
657; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
659; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
660; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
661; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
662; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
663; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
664; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
665; GFX6-NEXT:    v_rndne_f32_e32 v3, v3
666; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
667; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
668; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
669; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
670; GFX6-NEXT:    s_setpc_b64 s[30:31]
671;
672; GFX7-LABEL: v_roundeven_v4f16:
673; GFX7:       ; %bb.0:
674; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
676; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
677; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
678; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
679; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
680; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
681; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
682; GFX7-NEXT:    v_rndne_f32_e32 v3, v3
683; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
684; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
685; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
686; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
687; GFX7-NEXT:    s_setpc_b64 s[30:31]
688;
689; GFX8-LABEL: v_roundeven_v4f16:
690; GFX8:       ; %bb.0:
691; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692; GFX8-NEXT:    v_rndne_f16_e32 v2, v0
693; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
694; GFX8-NEXT:    v_rndne_f16_e32 v3, v1
695; GFX8-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
696; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
697; GFX8-NEXT:    v_or_b32_e32 v1, v3, v1
698; GFX8-NEXT:    s_setpc_b64 s[30:31]
699;
700; GFX9-LABEL: v_roundeven_v4f16:
701; GFX9:       ; %bb.0:
702; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703; GFX9-NEXT:    v_rndne_f16_e32 v2, v0
704; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
705; GFX9-NEXT:    v_rndne_f16_e32 v3, v1
706; GFX9-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
707; GFX9-NEXT:    v_pack_b32_f16 v0, v2, v0
708; GFX9-NEXT:    v_pack_b32_f16 v1, v3, v1
709; GFX9-NEXT:    s_setpc_b64 s[30:31]
710;
711; GFX10-LABEL: v_roundeven_v4f16:
712; GFX10:       ; %bb.0:
713; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714; GFX10-NEXT:    v_rndne_f16_e32 v2, v0
715; GFX10-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
716; GFX10-NEXT:    v_rndne_f16_e32 v3, v1
717; GFX10-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
718; GFX10-NEXT:    v_pack_b32_f16 v0, v2, v0
719; GFX10-NEXT:    v_pack_b32_f16 v1, v3, v1
720; GFX10-NEXT:    s_setpc_b64 s[30:31]
721;
722; GFX11-LABEL: v_roundeven_v4f16:
723; GFX11:       ; %bb.0:
724; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
726; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
727; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
728; GFX11-NEXT:    v_rndne_f16_e32 v1, v1
729; GFX11-NEXT:    v_rndne_f16_e32 v2, v2
730; GFX11-NEXT:    v_rndne_f16_e32 v3, v3
731; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v2
732; GFX11-NEXT:    v_pack_b32_f16 v1, v1, v3
733; GFX11-NEXT:    s_setpc_b64 s[30:31]
734;
735; SDAG_GFX6-LABEL: v_roundeven_v4f16:
736; SDAG_GFX6:       ; %bb.0:
737; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
739; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
740; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
741; SDAG_GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
742; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
743; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
744; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
745; SDAG_GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
746; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, v0
747; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v1, v1
748; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v2, v2
749; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v3, v3
750; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
751;
752; SDAG_GFX7-LABEL: v_roundeven_v4f16:
753; SDAG_GFX7:       ; %bb.0:
754; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
756; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
757; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
758; SDAG_GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
759; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
760; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
761; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
762; SDAG_GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
763; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, v0
764; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v1, v1
765; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v2, v2
766; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v3, v3
767; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
768;
769; SDAG_GFX8-LABEL: v_roundeven_v4f16:
770; SDAG_GFX8:       ; %bb.0:
771; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772; SDAG_GFX8-NEXT:    v_rndne_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
773; SDAG_GFX8-NEXT:    v_rndne_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
774; SDAG_GFX8-NEXT:    v_rndne_f16_e32 v1, v1
775; SDAG_GFX8-NEXT:    v_rndne_f16_e32 v0, v0
776; SDAG_GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
777; SDAG_GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
778; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
779;
780; SDAG_GFX9-LABEL: v_roundeven_v4f16:
781; SDAG_GFX9:       ; %bb.0:
782; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783; SDAG_GFX9-NEXT:    v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
784; SDAG_GFX9-NEXT:    v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
785; SDAG_GFX9-NEXT:    v_rndne_f16_e32 v1, v1
786; SDAG_GFX9-NEXT:    v_rndne_f16_e32 v0, v0
787; SDAG_GFX9-NEXT:    v_pack_b32_f16 v0, v0, v3
788; SDAG_GFX9-NEXT:    v_pack_b32_f16 v1, v1, v2
789; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
790;
791; SDAG_GFX10-LABEL: v_roundeven_v4f16:
792; SDAG_GFX10:       ; %bb.0:
793; SDAG_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794; SDAG_GFX10-NEXT:    v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
795; SDAG_GFX10-NEXT:    v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
796; SDAG_GFX10-NEXT:    v_rndne_f16_e32 v0, v0
797; SDAG_GFX10-NEXT:    v_rndne_f16_e32 v1, v1
798; SDAG_GFX10-NEXT:    v_pack_b32_f16 v0, v0, v3
799; SDAG_GFX10-NEXT:    v_pack_b32_f16 v1, v1, v2
800; SDAG_GFX10-NEXT:    s_setpc_b64 s[30:31]
801;
802; SDAG_GFX11-LABEL: v_roundeven_v4f16:
803; SDAG_GFX11:       ; %bb.0:
804; SDAG_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805; SDAG_GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
806; SDAG_GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
807; SDAG_GFX11-NEXT:    v_rndne_f16_e32 v1, v1
808; SDAG_GFX11-NEXT:    v_rndne_f16_e32 v0, v0
809; SDAG_GFX11-NEXT:    v_rndne_f16_e32 v2, v2
810; SDAG_GFX11-NEXT:    v_rndne_f16_e32 v3, v3
811; SDAG_GFX11-NEXT:    v_pack_b32_f16 v0, v0, v2
812; SDAG_GFX11-NEXT:    v_pack_b32_f16 v1, v1, v3
813; SDAG_GFX11-NEXT:    s_setpc_b64 s[30:31]
814  %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
815  ret <4 x half> %roundeven
816}
817
818
819define float @v_roundeven_f32_fabs(float %x) {
820; GFX6-LABEL: v_roundeven_f32_fabs:
821; GFX6:       ; %bb.0:
822; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823; GFX6-NEXT:    v_rndne_f32_e64 v0, |v0|
824; GFX6-NEXT:    s_setpc_b64 s[30:31]
825;
826; GFX7-LABEL: v_roundeven_f32_fabs:
827; GFX7:       ; %bb.0:
828; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829; GFX7-NEXT:    v_rndne_f32_e64 v0, |v0|
830; GFX7-NEXT:    s_setpc_b64 s[30:31]
831;
832; GFX8-LABEL: v_roundeven_f32_fabs:
833; GFX8:       ; %bb.0:
834; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835; GFX8-NEXT:    v_rndne_f32_e64 v0, |v0|
836; GFX8-NEXT:    s_setpc_b64 s[30:31]
837;
838; GFX9-LABEL: v_roundeven_f32_fabs:
839; GFX9:       ; %bb.0:
840; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841; GFX9-NEXT:    v_rndne_f32_e64 v0, |v0|
842; GFX9-NEXT:    s_setpc_b64 s[30:31]
843;
844; GFX10PLUS-LABEL: v_roundeven_f32_fabs:
845; GFX10PLUS:       ; %bb.0:
846; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847; GFX10PLUS-NEXT:    v_rndne_f32_e64 v0, |v0|
848; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
849;
850; SDAG_GFX6-LABEL: v_roundeven_f32_fabs:
851; SDAG_GFX6:       ; %bb.0:
852; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853; SDAG_GFX6-NEXT:    v_rndne_f32_e64 v0, |v0|
854; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
855;
856; SDAG_GFX7-LABEL: v_roundeven_f32_fabs:
857; SDAG_GFX7:       ; %bb.0:
858; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859; SDAG_GFX7-NEXT:    v_rndne_f32_e64 v0, |v0|
860; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
861;
862; SDAG_GFX8-LABEL: v_roundeven_f32_fabs:
863; SDAG_GFX8:       ; %bb.0:
864; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
865; SDAG_GFX8-NEXT:    v_rndne_f32_e64 v0, |v0|
866; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
867;
868; SDAG_GFX9-LABEL: v_roundeven_f32_fabs:
869; SDAG_GFX9:       ; %bb.0:
870; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
871; SDAG_GFX9-NEXT:    v_rndne_f32_e64 v0, |v0|
872; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
873;
874; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fabs:
875; SDAG_GFX10PLUS:       ; %bb.0:
876; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e64 v0, |v0|
878; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
879  %fabs.x = call float @llvm.fabs.f32(float %x)
880  %roundeven = call float @llvm.roundeven.f32(float %fabs.x)
881  ret float %roundeven
882}
883
884define amdgpu_ps float @s_roundeven_f32(float inreg %x) {
885; GFX6-LABEL: s_roundeven_f32:
886; GFX6:       ; %bb.0:
887; GFX6-NEXT:    v_rndne_f32_e32 v0, s0
888; GFX6-NEXT:    ; return to shader part epilog
889;
890; GFX7-LABEL: s_roundeven_f32:
891; GFX7:       ; %bb.0:
892; GFX7-NEXT:    v_rndne_f32_e32 v0, s0
893; GFX7-NEXT:    ; return to shader part epilog
894;
895; GFX8-LABEL: s_roundeven_f32:
896; GFX8:       ; %bb.0:
897; GFX8-NEXT:    v_rndne_f32_e32 v0, s0
898; GFX8-NEXT:    ; return to shader part epilog
899;
900; GFX9-LABEL: s_roundeven_f32:
901; GFX9:       ; %bb.0:
902; GFX9-NEXT:    v_rndne_f32_e32 v0, s0
903; GFX9-NEXT:    ; return to shader part epilog
904;
905; GFX10PLUS-LABEL: s_roundeven_f32:
906; GFX10PLUS:       ; %bb.0:
907; GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, s0
908; GFX10PLUS-NEXT:    ; return to shader part epilog
909;
910; SDAG_GFX6-LABEL: s_roundeven_f32:
911; SDAG_GFX6:       ; %bb.0:
912; SDAG_GFX6-NEXT:    v_rndne_f32_e32 v0, s0
913; SDAG_GFX6-NEXT:    ; return to shader part epilog
914;
915; SDAG_GFX7-LABEL: s_roundeven_f32:
916; SDAG_GFX7:       ; %bb.0:
917; SDAG_GFX7-NEXT:    v_rndne_f32_e32 v0, s0
918; SDAG_GFX7-NEXT:    ; return to shader part epilog
919;
920; SDAG_GFX8-LABEL: s_roundeven_f32:
921; SDAG_GFX8:       ; %bb.0:
922; SDAG_GFX8-NEXT:    v_rndne_f32_e32 v0, s0
923; SDAG_GFX8-NEXT:    ; return to shader part epilog
924;
925; SDAG_GFX9-LABEL: s_roundeven_f32:
926; SDAG_GFX9:       ; %bb.0:
927; SDAG_GFX9-NEXT:    v_rndne_f32_e32 v0, s0
928; SDAG_GFX9-NEXT:    ; return to shader part epilog
929;
930; SDAG_GFX10PLUS-LABEL: s_roundeven_f32:
931; SDAG_GFX10PLUS:       ; %bb.0:
932; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e32 v0, s0
933; SDAG_GFX10PLUS-NEXT:    ; return to shader part epilog
934  %roundeven = call float @llvm.roundeven.f32(float %x)
935  ret float %roundeven
936}
937
938define float @v_roundeven_f32_fneg(float %x) {
939; GFX6-LABEL: v_roundeven_f32_fneg:
940; GFX6:       ; %bb.0:
941; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942; GFX6-NEXT:    v_rndne_f32_e64 v0, -v0
943; GFX6-NEXT:    s_setpc_b64 s[30:31]
944;
945; GFX7-LABEL: v_roundeven_f32_fneg:
946; GFX7:       ; %bb.0:
947; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948; GFX7-NEXT:    v_rndne_f32_e64 v0, -v0
949; GFX7-NEXT:    s_setpc_b64 s[30:31]
950;
951; GFX8-LABEL: v_roundeven_f32_fneg:
952; GFX8:       ; %bb.0:
953; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954; GFX8-NEXT:    v_rndne_f32_e64 v0, -v0
955; GFX8-NEXT:    s_setpc_b64 s[30:31]
956;
957; GFX9-LABEL: v_roundeven_f32_fneg:
958; GFX9:       ; %bb.0:
959; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960; GFX9-NEXT:    v_rndne_f32_e64 v0, -v0
961; GFX9-NEXT:    s_setpc_b64 s[30:31]
962;
963; GFX10PLUS-LABEL: v_roundeven_f32_fneg:
964; GFX10PLUS:       ; %bb.0:
965; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966; GFX10PLUS-NEXT:    v_rndne_f32_e64 v0, -v0
967; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
968;
969; SDAG_GFX6-LABEL: v_roundeven_f32_fneg:
970; SDAG_GFX6:       ; %bb.0:
971; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; SDAG_GFX6-NEXT:    v_rndne_f32_e64 v0, -v0
973; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
974;
975; SDAG_GFX7-LABEL: v_roundeven_f32_fneg:
976; SDAG_GFX7:       ; %bb.0:
977; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978; SDAG_GFX7-NEXT:    v_rndne_f32_e64 v0, -v0
979; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
980;
981; SDAG_GFX8-LABEL: v_roundeven_f32_fneg:
982; SDAG_GFX8:       ; %bb.0:
983; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; SDAG_GFX8-NEXT:    v_rndne_f32_e64 v0, -v0
985; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
986;
987; SDAG_GFX9-LABEL: v_roundeven_f32_fneg:
988; SDAG_GFX9:       ; %bb.0:
989; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
990; SDAG_GFX9-NEXT:    v_rndne_f32_e64 v0, -v0
991; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
992;
993; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fneg:
994; SDAG_GFX10PLUS:       ; %bb.0:
995; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; SDAG_GFX10PLUS-NEXT:    v_rndne_f32_e64 v0, -v0
997; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
998  %neg.x = fneg float %x
999  %roundeven = call float @llvm.roundeven.f32(float %neg.x)
1000  ret float %roundeven
1001}
1002
1003define double @v_roundeven_f64(double %x) {
1004; GFX6-LABEL: v_roundeven_f64:
1005; GFX6:       ; %bb.0:
1006; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007; GFX6-NEXT:    v_and_b32_e32 v3, 0x80000000, v1
1008; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1009; GFX6-NEXT:    v_or_b32_e32 v3, 0x43300000, v3
1010; GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], v[2:3]
1011; GFX6-NEXT:    v_mov_b32_e32 v6, -1
1012; GFX6-NEXT:    v_mov_b32_e32 v7, 0x432fffff
1013; GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
1014; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
1015; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1016; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1017; GFX6-NEXT:    s_setpc_b64 s[30:31]
1018;
1019; GFX7-LABEL: v_roundeven_f64:
1020; GFX7:       ; %bb.0:
1021; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1022; GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1023; GFX7-NEXT:    s_setpc_b64 s[30:31]
1024;
1025; GFX8-LABEL: v_roundeven_f64:
1026; GFX8:       ; %bb.0:
1027; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1028; GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1029; GFX8-NEXT:    s_setpc_b64 s[30:31]
1030;
1031; GFX9-LABEL: v_roundeven_f64:
1032; GFX9:       ; %bb.0:
1033; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1035; GFX9-NEXT:    s_setpc_b64 s[30:31]
1036;
1037; GFX10PLUS-LABEL: v_roundeven_f64:
1038; GFX10PLUS:       ; %bb.0:
1039; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040; GFX10PLUS-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1041; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1042;
1043; SDAG_GFX6-LABEL: v_roundeven_f64:
1044; SDAG_GFX6:       ; %bb.0:
1045; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046; SDAG_GFX6-NEXT:    s_brev_b32 s6, -2
1047; SDAG_GFX6-NEXT:    v_mov_b32_e32 v2, 0x43300000
1048; SDAG_GFX6-NEXT:    v_bfi_b32 v3, s6, v2, v1
1049; SDAG_GFX6-NEXT:    v_mov_b32_e32 v2, 0
1050; SDAG_GFX6-NEXT:    s_mov_b32 s4, -1
1051; SDAG_GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], v[2:3]
1052; SDAG_GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
1053; SDAG_GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
1054; SDAG_GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1055; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1056; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1057; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
1058;
1059; SDAG_GFX7-LABEL: v_roundeven_f64:
1060; SDAG_GFX7:       ; %bb.0:
1061; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062; SDAG_GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1063; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
1064;
1065; SDAG_GFX8-LABEL: v_roundeven_f64:
1066; SDAG_GFX8:       ; %bb.0:
1067; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068; SDAG_GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1069; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
1070;
1071; SDAG_GFX9-LABEL: v_roundeven_f64:
1072; SDAG_GFX9:       ; %bb.0:
1073; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074; SDAG_GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1075; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
1076;
1077; SDAG_GFX10PLUS-LABEL: v_roundeven_f64:
1078; SDAG_GFX10PLUS:       ; %bb.0:
1079; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1080; SDAG_GFX10PLUS-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1081; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1082  %roundeven = call double @llvm.roundeven.f64(double %x)
1083  ret double %roundeven
1084}
1085
1086define double @v_roundeven_f64_fneg(double %x) {
1087; GFX6-LABEL: v_roundeven_f64_fneg:
1088; GFX6:       ; %bb.0:
1089; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090; GFX6-NEXT:    v_xor_b32_e32 v8, 0x80000000, v1
1091; GFX6-NEXT:    v_and_b32_e32 v3, 0x80000000, v8
1092; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1093; GFX6-NEXT:    v_or_b32_e32 v3, 0x43300000, v3
1094; GFX6-NEXT:    v_add_f64 v[4:5], -v[0:1], v[2:3]
1095; GFX6-NEXT:    v_mov_b32_e32 v6, -1
1096; GFX6-NEXT:    v_mov_b32_e32 v7, 0x432fffff
1097; GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
1098; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
1099; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1100; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
1101; GFX6-NEXT:    s_setpc_b64 s[30:31]
1102;
1103; GFX7-LABEL: v_roundeven_f64_fneg:
1104; GFX7:       ; %bb.0:
1105; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GFX7-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1107; GFX7-NEXT:    s_setpc_b64 s[30:31]
1108;
1109; GFX8-LABEL: v_roundeven_f64_fneg:
1110; GFX8:       ; %bb.0:
1111; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1112; GFX8-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1113; GFX8-NEXT:    s_setpc_b64 s[30:31]
1114;
1115; GFX9-LABEL: v_roundeven_f64_fneg:
1116; GFX9:       ; %bb.0:
1117; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118; GFX9-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1119; GFX9-NEXT:    s_setpc_b64 s[30:31]
1120;
1121; GFX10PLUS-LABEL: v_roundeven_f64_fneg:
1122; GFX10PLUS:       ; %bb.0:
1123; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124; GFX10PLUS-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1125; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1126;
1127; SDAG_GFX6-LABEL: v_roundeven_f64_fneg:
1128; SDAG_GFX6:       ; %bb.0:
1129; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130; SDAG_GFX6-NEXT:    v_xor_b32_e32 v6, 0x80000000, v1
1131; SDAG_GFX6-NEXT:    s_brev_b32 s4, -2
1132; SDAG_GFX6-NEXT:    v_mov_b32_e32 v2, 0x43300000
1133; SDAG_GFX6-NEXT:    v_bfi_b32 v3, s4, v2, v6
1134; SDAG_GFX6-NEXT:    v_mov_b32_e32 v2, 0
1135; SDAG_GFX6-NEXT:    v_add_f64 v[4:5], -v[0:1], v[2:3]
1136; SDAG_GFX6-NEXT:    s_mov_b32 s4, -1
1137; SDAG_GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
1138; SDAG_GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
1139; SDAG_GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1140; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1141; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
1142; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
1143;
1144; SDAG_GFX7-LABEL: v_roundeven_f64_fneg:
1145; SDAG_GFX7:       ; %bb.0:
1146; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1147; SDAG_GFX7-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1148; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
1149;
1150; SDAG_GFX8-LABEL: v_roundeven_f64_fneg:
1151; SDAG_GFX8:       ; %bb.0:
1152; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; SDAG_GFX8-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1154; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
1155;
1156; SDAG_GFX9-LABEL: v_roundeven_f64_fneg:
1157; SDAG_GFX9:       ; %bb.0:
1158; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1159; SDAG_GFX9-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1160; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
1161;
1162; SDAG_GFX10PLUS-LABEL: v_roundeven_f64_fneg:
1163; SDAG_GFX10PLUS:       ; %bb.0:
1164; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165; SDAG_GFX10PLUS-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
1166; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1167  %neg.x = fneg double %x
1168  %roundeven = call double @llvm.roundeven.f64(double %neg.x)
1169  ret double %roundeven
1170}
1171
1172define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
1173; GFX6-LABEL: v_roundeven_v2f64:
1174; GFX6:       ; %bb.0:
1175; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176; GFX6-NEXT:    v_and_b32_e32 v5, 0x80000000, v1
1177; GFX6-NEXT:    v_mov_b32_e32 v4, 0
1178; GFX6-NEXT:    v_or_b32_e32 v5, 0x43300000, v5
1179; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], v[4:5]
1180; GFX6-NEXT:    s_mov_b32 s4, -1
1181; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
1182; GFX6-NEXT:    v_add_f64 v[5:6], v[6:7], -v[4:5]
1183; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1184; GFX6-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
1185; GFX6-NEXT:    v_and_b32_e32 v5, 0x80000000, v3
1186; GFX6-NEXT:    v_or_b32_e32 v5, 0x43300000, v5
1187; GFX6-NEXT:    v_add_f64 v[7:8], v[2:3], v[4:5]
1188; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
1189; GFX6-NEXT:    v_add_f64 v[4:5], v[7:8], -v[4:5]
1190; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
1191; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1192; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
1193; GFX6-NEXT:    s_setpc_b64 s[30:31]
1194;
1195; GFX7-LABEL: v_roundeven_v2f64:
1196; GFX7:       ; %bb.0:
1197; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198; GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1199; GFX7-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1200; GFX7-NEXT:    s_setpc_b64 s[30:31]
1201;
1202; GFX8-LABEL: v_roundeven_v2f64:
1203; GFX8:       ; %bb.0:
1204; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205; GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1206; GFX8-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1207; GFX8-NEXT:    s_setpc_b64 s[30:31]
1208;
1209; GFX9-LABEL: v_roundeven_v2f64:
1210; GFX9:       ; %bb.0:
1211; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1212; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1213; GFX9-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1214; GFX9-NEXT:    s_setpc_b64 s[30:31]
1215;
1216; GFX10PLUS-LABEL: v_roundeven_v2f64:
1217; GFX10PLUS:       ; %bb.0:
1218; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219; GFX10PLUS-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1220; GFX10PLUS-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1221; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1222;
1223; SDAG_GFX6-LABEL: v_roundeven_v2f64:
1224; SDAG_GFX6:       ; %bb.0:
1225; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226; SDAG_GFX6-NEXT:    s_brev_b32 s6, -2
1227; SDAG_GFX6-NEXT:    v_mov_b32_e32 v8, 0x43300000
1228; SDAG_GFX6-NEXT:    v_bfi_b32 v5, s6, v8, v1
1229; SDAG_GFX6-NEXT:    v_mov_b32_e32 v4, 0
1230; SDAG_GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], v[4:5]
1231; SDAG_GFX6-NEXT:    s_mov_b32 s4, -1
1232; SDAG_GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
1233; SDAG_GFX6-NEXT:    v_add_f64 v[5:6], v[6:7], -v[4:5]
1234; SDAG_GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1235; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
1236; SDAG_GFX6-NEXT:    v_bfi_b32 v5, s6, v8, v3
1237; SDAG_GFX6-NEXT:    v_add_f64 v[7:8], v[2:3], v[4:5]
1238; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
1239; SDAG_GFX6-NEXT:    v_add_f64 v[4:5], v[7:8], -v[4:5]
1240; SDAG_GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
1241; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1242; SDAG_GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
1243; SDAG_GFX6-NEXT:    s_setpc_b64 s[30:31]
1244;
1245; SDAG_GFX7-LABEL: v_roundeven_v2f64:
1246; SDAG_GFX7:       ; %bb.0:
1247; SDAG_GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; SDAG_GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1249; SDAG_GFX7-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1250; SDAG_GFX7-NEXT:    s_setpc_b64 s[30:31]
1251;
1252; SDAG_GFX8-LABEL: v_roundeven_v2f64:
1253; SDAG_GFX8:       ; %bb.0:
1254; SDAG_GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255; SDAG_GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1256; SDAG_GFX8-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1257; SDAG_GFX8-NEXT:    s_setpc_b64 s[30:31]
1258;
1259; SDAG_GFX9-LABEL: v_roundeven_v2f64:
1260; SDAG_GFX9:       ; %bb.0:
1261; SDAG_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262; SDAG_GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1263; SDAG_GFX9-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1264; SDAG_GFX9-NEXT:    s_setpc_b64 s[30:31]
1265;
1266; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f64:
1267; SDAG_GFX10PLUS:       ; %bb.0:
1268; SDAG_GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1269; SDAG_GFX10PLUS-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
1270; SDAG_GFX10PLUS-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
1271; SDAG_GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1272  %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
1273  ret <2 x double> %roundeven
1274}
1275
1276declare half @llvm.roundeven.f16(half) #0
1277declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
1278declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
1279
1280declare float @llvm.roundeven.f32(float) #0
1281declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
1282declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
1283declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
1284
1285declare double @llvm.roundeven.f64(double) #0
1286declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
1287
1288declare half @llvm.fabs.f16(half) #0
1289declare float @llvm.fabs.f32(float) #0
1290
1291attributes #0 = { nounwind readnone speculatable willreturn }
1292