xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
5
6define i32 @fneg_xor_select_i32(i1 %cond, i32 %arg0, i32 %arg1) {
7; GCN-LABEL: fneg_xor_select_i32:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
11; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
12; GCN-NEXT:    v_cndmask_b32_e64 v0, -v2, -v1, vcc
13; GCN-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX11-LABEL: fneg_xor_select_i32:
16; GFX11:       ; %bb.0:
17; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
19; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
20; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
21; GFX11-NEXT:    v_cndmask_b32_e64 v0, -v2, -v1, vcc_lo
22; GFX11-NEXT:    s_setpc_b64 s[30:31]
23  %select = select i1 %cond, i32 %arg0, i32 %arg1
24  %fneg = xor i32 %select, -2147483648
25  ret i32 %fneg
26}
27
28define <2 x i32> @fneg_xor_select_v2i32(<2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1) {
29; GCN-LABEL: fneg_xor_select_v2i32:
30; GCN:       ; %bb.0:
31; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
33; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
34; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
35; GCN-NEXT:    v_cndmask_b32_e64 v0, -v4, -v2, vcc
36; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
37; GCN-NEXT:    v_cndmask_b32_e64 v1, -v5, -v3, vcc
38; GCN-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX11-LABEL: fneg_xor_select_v2i32:
41; GFX11:       ; %bb.0:
42; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
44; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
45; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
46; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
47; GFX11-NEXT:    v_cndmask_b32_e64 v0, -v4, -v2, vcc_lo
48; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
49; GFX11-NEXT:    v_cndmask_b32_e64 v1, -v5, -v3, vcc_lo
50; GFX11-NEXT:    s_setpc_b64 s[30:31]
51  %select = select <2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1
52  %fneg = xor <2 x i32> %select, <i32 -2147483648, i32 -2147483648>
53  ret <2 x i32> %fneg
54}
55
56define i32 @fneg_xor_select_i32_multi_use(i1 %cond, i32 %arg0, i32 %arg1, ptr addrspace(1) %ptr) {
57; GFX7-LABEL: fneg_xor_select_i32_multi_use:
58; GFX7:       ; %bb.0:
59; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
61; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
62; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
63; GFX7-NEXT:    flat_store_dword v[3:4], v0
64; GFX7-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
65; GFX7-NEXT:    s_waitcnt vmcnt(0)
66; GFX7-NEXT:    s_setpc_b64 s[30:31]
67;
68; GFX9-LABEL: fneg_xor_select_i32_multi_use:
69; GFX9:       ; %bb.0:
70; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
72; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
73; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
74; GFX9-NEXT:    global_store_dword v[3:4], v0, off
75; GFX9-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
76; GFX9-NEXT:    s_waitcnt vmcnt(0)
77; GFX9-NEXT:    s_setpc_b64 s[30:31]
78;
79; GFX11-LABEL: fneg_xor_select_i32_multi_use:
80; GFX11:       ; %bb.0:
81; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
83; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
84; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
85; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
86; GFX11-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
87; GFX11-NEXT:    global_store_b32 v[3:4], v1, off
88; GFX11-NEXT:    s_setpc_b64 s[30:31]
89  %select = select i1 %cond, i32 %arg0, i32 %arg1
90  store i32 %select, ptr addrspace(1) %ptr
91  %fneg = xor i32 %select, -2147483648
92  ret i32 %fneg
93}
94
95define i64 @fneg_xor_select_i64(i1 %cond, i64 %arg0, i64 %arg1) {
96; GCN-LABEL: fneg_xor_select_i64:
97; GCN:       ; %bb.0:
98; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
100; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
101; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
102; GCN-NEXT:    v_cndmask_b32_e64 v1, -v4, -v2, vcc
103; GCN-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX11-LABEL: fneg_xor_select_i64:
106; GFX11:       ; %bb.0:
107; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
109; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
110; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
111; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc_lo
112; GFX11-NEXT:    v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
113; GFX11-NEXT:    s_setpc_b64 s[30:31]
114  %select = select i1 %cond, i64 %arg0, i64 %arg1
115  %fneg = xor i64 %select, 9223372036854775808
116  ret i64 %fneg
117}
118
119define <2 x i64> @fneg_xor_select_v2i64(<2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1) {
120; GCN-LABEL: fneg_xor_select_v2i64:
121; GCN:       ; %bb.0:
122; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
124; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
125; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
126; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v1
127; GCN-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
128; GCN-NEXT:    v_cndmask_b32_e64 v2, v8, v4, s[4:5]
129; GCN-NEXT:    v_cndmask_b32_e64 v1, -v7, -v3, vcc
130; GCN-NEXT:    v_cndmask_b32_e64 v3, -v9, -v5, s[4:5]
131; GCN-NEXT:    s_setpc_b64 s[30:31]
132;
133; GFX11-LABEL: fneg_xor_select_v2i64:
134; GFX11:       ; %bb.0:
135; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
137; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
138; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
139; GFX11-NEXT:    v_dual_cndmask_b32 v0, v6, v2 :: v_dual_and_b32 v1, 1, v1
140; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v1
141; GFX11-NEXT:    v_cndmask_b32_e64 v1, -v7, -v3, vcc_lo
142; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
143; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, v4, s0
144; GFX11-NEXT:    v_cndmask_b32_e64 v3, -v9, -v5, s0
145; GFX11-NEXT:    s_setpc_b64 s[30:31]
146  %select = select <2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1
147  %fneg = xor <2 x i64> %select, <i64 9223372036854775808, i64 9223372036854775808>
148  ret <2 x i64> %fneg
149}
150
151define i16 @fneg_xor_select_i16(i1 %cond, i16 %arg0, i16 %arg1) {
152; GCN-LABEL: fneg_xor_select_i16:
153; GCN:       ; %bb.0:
154; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
156; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
157; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
158; GCN-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
159; GCN-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX11-LABEL: fneg_xor_select_i16:
162; GFX11:       ; %bb.0:
163; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
165; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
166; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
167; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
168; GFX11-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
169; GFX11-NEXT:    s_setpc_b64 s[30:31]
170  %select = select i1 %cond, i16 %arg0, i16 %arg1
171  %fneg = xor i16 %select, -32768
172  ret i16 %fneg
173}
174
175define <2 x i16> @fneg_xor_select_v2i16(<2 x i1> %cond, <2 x i16> %arg0, <2 x i16> %arg1) {
176; GFX7-LABEL: fneg_xor_select_v2i16:
177; GFX7:       ; %bb.0:
178; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
180; GFX7-NEXT:    v_and_b32_e32 v1, 1, v1
181; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
182; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
183; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
184; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
185; GFX7-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
186; GFX7-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
187; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
188; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
189; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
190; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v1
191; GFX7-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX9-LABEL: fneg_xor_select_v2i16:
194; GFX9:       ; %bb.0:
195; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
197; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
198; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
199; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
200; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
201; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
202; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
203; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
204; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
205; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
206; GFX9-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
207; GFX9-NEXT:    s_setpc_b64 s[30:31]
208;
209; GFX11-LABEL: fneg_xor_select_v2i16:
210; GFX11:       ; %bb.0:
211; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
213; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
214; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
215; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
216; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
217; GFX11-NEXT:    v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1
218; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
219; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
220; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
221; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
222; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
223; GFX11-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
224; GFX11-NEXT:    s_setpc_b64 s[30:31]
225  %select = select <2 x i1> %cond, <2 x i16> %arg0, <2 x i16> %arg1
226  %fneg = xor <2 x i16> %select, <i16 -32768, i16 -32768>
227  ret <2 x i16> %fneg
228}
229
230define i16 @fneg_xor_select_i16_multi_use(i1 %cond, i16 %arg0, i16 %arg1, ptr addrspace(1) %ptr) {
231; GFX7-LABEL: fneg_xor_select_i16_multi_use:
232; GFX7:       ; %bb.0:
233; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
235; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
236; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
237; GFX7-NEXT:    flat_store_short v[3:4], v0
238; GFX7-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
239; GFX7-NEXT:    s_waitcnt vmcnt(0)
240; GFX7-NEXT:    s_setpc_b64 s[30:31]
241;
242; GFX9-LABEL: fneg_xor_select_i16_multi_use:
243; GFX9:       ; %bb.0:
244; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
246; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
247; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
248; GFX9-NEXT:    global_store_short v[3:4], v0, off
249; GFX9-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
250; GFX9-NEXT:    s_waitcnt vmcnt(0)
251; GFX9-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX11-LABEL: fneg_xor_select_i16_multi_use:
254; GFX11:       ; %bb.0:
255; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
257; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
258; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
259; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
260; GFX11-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v1
261; GFX11-NEXT:    global_store_b16 v[3:4], v1, off
262; GFX11-NEXT:    s_setpc_b64 s[30:31]
263  %select = select i1 %cond, i16 %arg0, i16 %arg1
264  store i16 %select, ptr addrspace(1) %ptr
265  %fneg = xor i16 %select, -32768
266  ret i16 %fneg
267}
268
269define i64 @fneg_xor_select_i64_multi_user(i1 %cond, i64 %arg0, i64 %arg1, ptr addrspace(1) %ptr) {
270; GFX7-LABEL: fneg_xor_select_i64_multi_user:
271; GFX7:       ; %bb.0:
272; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
274; GFX7-NEXT:    v_mov_b32_e32 v7, v1
275; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
276; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
277; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
278; GFX7-NEXT:    flat_store_dwordx2 v[5:6], v[0:1]
279; GFX7-NEXT:    v_cndmask_b32_e64 v1, -v4, -v2, vcc
280; GFX7-NEXT:    s_waitcnt vmcnt(0)
281; GFX7-NEXT:    s_setpc_b64 s[30:31]
282;
283; GFX9-LABEL: fneg_xor_select_i64_multi_user:
284; GFX9:       ; %bb.0:
285; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
287; GFX9-NEXT:    v_mov_b32_e32 v7, v1
288; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
289; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
290; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
291; GFX9-NEXT:    global_store_dwordx2 v[5:6], v[0:1], off
292; GFX9-NEXT:    v_cndmask_b32_e64 v1, -v4, -v2, vcc
293; GFX9-NEXT:    s_waitcnt vmcnt(0)
294; GFX9-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX11-LABEL: fneg_xor_select_i64_multi_user:
297; GFX11:       ; %bb.0:
298; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX11-NEXT:    v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
300; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
301; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
302; GFX11-NEXT:    v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7
303; GFX11-NEXT:    v_cndmask_b32_e64 v2, -v4, -v2, vcc_lo
304; GFX11-NEXT:    global_store_b64 v[5:6], v[0:1], off
305; GFX11-NEXT:    v_mov_b32_e32 v1, v2
306; GFX11-NEXT:    s_setpc_b64 s[30:31]
307  %select = select i1 %cond, i64 %arg0, i64 %arg1
308  store i64 %select, ptr addrspace(1) %ptr
309  %fneg = xor i64 %select, 9223372036854775808
310  ret i64 %fneg
311}
312
313define i32 @select_fneg_xor_select_i32(i1 %cond0, i1 %cond1, i32 %arg0, i32 %arg1) {
314; GCN-LABEL: select_fneg_xor_select_i32:
315; GCN:       ; %bb.0:
316; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
318; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
319; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
320; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
321; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
322; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
323; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
324; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
325; GCN-NEXT:    s_setpc_b64 s[30:31]
326;
327; GFX11-LABEL: select_fneg_xor_select_i32:
328; GFX11:       ; %bb.0:
329; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
331; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
332; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
333; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
334; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
335; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
336; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
337; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
338; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
339; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
340; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
341; GFX11-NEXT:    s_setpc_b64 s[30:31]
342  %fneg0 = xor i32 %arg0, -2147483648
343  %select0 = select i1 %cond0, i32 %arg1, i32 %fneg0
344  %fneg1 = xor i32 %select0, -2147483648
345  %select1 = select i1 %cond1, i32 %fneg1, i32 %select0
346  ret i32 %select1
347}
348
349define float @select_fneg_select_f32(i1 %cond0, i1 %cond1, float %arg0, float %arg1) {
350; GCN-LABEL: select_fneg_select_f32:
351; GCN:       ; %bb.0:
352; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
354; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
355; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
356; GCN-NEXT:    v_cndmask_b32_e64 v0, -v2, v3, vcc
357; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
358; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -v0, vcc
359; GCN-NEXT:    s_setpc_b64 s[30:31]
360;
361; GFX11-LABEL: select_fneg_select_f32:
362; GFX11:       ; %bb.0:
363; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
365; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
366; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
367; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
368; GFX11-NEXT:    v_cndmask_b32_e64 v0, -v2, v3, vcc_lo
369; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
370; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
371; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, -v0, vcc_lo
372; GFX11-NEXT:    s_setpc_b64 s[30:31]
373  %fneg0 = fneg float %arg0
374  %select0 = select i1 %cond0, float %arg1, float %fneg0
375  %fneg1 = fneg float %select0
376  %select1 = select i1 %cond1, float %fneg1, float %select0
377  ret float %select1
378}
379
380define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) {
381; GCN-LABEL: fneg_xor_select_f64:
382; GCN:       ; %bb.0:
383; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
385; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
386; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
387; GCN-NEXT:    v_cndmask_b32_e64 v1, -v4, -v2, vcc
388; GCN-NEXT:    s_setpc_b64 s[30:31]
389;
390; GFX11-LABEL: fneg_xor_select_f64:
391; GFX11:       ; %bb.0:
392; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
394; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
395; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
396; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc_lo
397; GFX11-NEXT:    v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
398; GFX11-NEXT:    s_setpc_b64 s[30:31]
399  %select = select i1 %cond, double %arg0, double %arg1
400  %fneg = fneg double %select
401  ret double %fneg
402}
403
404define double @fneg_xor_select_f64_multi_user(i1 %cond, double %arg0, double %arg1, ptr addrspace(1) %ptr) {
405; GFX7-LABEL: fneg_xor_select_f64_multi_user:
406; GFX7:       ; %bb.0:
407; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
409; GFX7-NEXT:    v_mov_b32_e32 v7, v1
410; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
411; GFX7-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
412; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
413; GFX7-NEXT:    flat_store_dwordx2 v[5:6], v[0:1]
414; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
415; GFX7-NEXT:    s_waitcnt vmcnt(0)
416; GFX7-NEXT:    s_setpc_b64 s[30:31]
417;
418; GFX9-LABEL: fneg_xor_select_f64_multi_user:
419; GFX9:       ; %bb.0:
420; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
422; GFX9-NEXT:    v_mov_b32_e32 v7, v1
423; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
424; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
425; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
426; GFX9-NEXT:    global_store_dwordx2 v[5:6], v[0:1], off
427; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
428; GFX9-NEXT:    s_waitcnt vmcnt(0)
429; GFX9-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX11-LABEL: fneg_xor_select_f64_multi_user:
432; GFX11:       ; %bb.0:
433; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX11-NEXT:    v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
435; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
436; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
437; GFX11-NEXT:    v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7
438; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
439; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
440; GFX11-NEXT:    global_store_b64 v[5:6], v[0:1], off
441; GFX11-NEXT:    v_mov_b32_e32 v1, v2
442; GFX11-NEXT:    s_setpc_b64 s[30:31]
443  %select = select i1 %cond, double %arg0, double %arg1
444  store double %select, ptr addrspace(1) %ptr
445  %fneg = fneg double %select
446  ret double %fneg
447}
448
449define double @fneg_xor_select_i64_user_with_srcmods(i1 %cond, i64 %arg0, i64 %arg1) {
450; GCN-LABEL: fneg_xor_select_i64_user_with_srcmods:
451; GCN:       ; %bb.0:
452; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
454; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
455; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
456; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
457; GCN-NEXT:    v_add_f64 v[0:1], -v[1:2], 2.0
458; GCN-NEXT:    s_setpc_b64 s[30:31]
459;
460; GFX11-LABEL: fneg_xor_select_i64_user_with_srcmods:
461; GFX11:       ; %bb.0:
462; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
464; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
465; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
466; GFX11-NEXT:    v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v2, v4, v2
467; GFX11-NEXT:    v_add_f64 v[0:1], -v[1:2], 2.0
468; GFX11-NEXT:    s_setpc_b64 s[30:31]
469  %select = select i1 %cond, i64 %arg0, i64 %arg1
470  %fneg = xor i64 %select, 9223372036854775808
471  %cast = bitcast i64 %fneg to double
472  %add = fadd double %cast, 2.0
473  ret double %add
474}
475
476define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, double %arg1) {
477; GCN-LABEL: select_fneg_select_fneg_f64:
478; GCN:       ; %bb.0:
479; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
481; GCN-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
482; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
483; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
484; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
485; GCN-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc
486; GCN-NEXT:    v_xor_b32_e32 v3, 0x80000000, v2
487; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
488; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
489; GCN-NEXT:    s_setpc_b64 s[30:31]
490;
491; GFX11-LABEL: select_fneg_select_fneg_f64:
492; GFX11:       ; %bb.0:
493; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
495; GFX11-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
496; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
497; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
498; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
499; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
500; GFX11-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc_lo
501; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
502; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
503; GFX11-NEXT:    v_xor_b32_e32 v3, 0x80000000, v2
504; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
505; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc_lo
506; GFX11-NEXT:    s_setpc_b64 s[30:31]
507  %fneg0 = fneg double %arg0
508  %select0 = select i1 %cond0, double %arg1, double %fneg0
509  %fneg1 = fneg double %select0
510  %select1 = select i1 %cond1, double %fneg1, double %select0
511  ret double %select1
512}
513
514define i64 @select_fneg_xor_select_i64(i1 %cond0, i1 %cond1, i64 %arg0, i64 %arg1) {
515; GCN-LABEL: select_fneg_xor_select_i64:
516; GCN:       ; %bb.0:
517; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
519; GCN-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
520; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
521; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
522; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
523; GCN-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc
524; GCN-NEXT:    v_xor_b32_e32 v3, 0x80000000, v2
525; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
526; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
527; GCN-NEXT:    s_setpc_b64 s[30:31]
528;
529; GFX11-LABEL: select_fneg_xor_select_i64:
530; GFX11:       ; %bb.0:
531; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
533; GFX11-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
534; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
535; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
536; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
537; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
538; GFX11-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc_lo
539; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
540; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
541; GFX11-NEXT:    v_xor_b32_e32 v3, 0x80000000, v2
542; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
543; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc_lo
544; GFX11-NEXT:    s_setpc_b64 s[30:31]
545  %fneg0 = xor i64 %arg0, 9223372036854775808
546  %select0 = select i1 %cond0, i64 %arg1, i64 %fneg0
547  %fneg1 = xor i64 %select0, 9223372036854775808
548  %select1 = select i1 %cond1, i64 %fneg1, i64 %select0
549  ret i64 %select1
550}
551
552define half @select_fneg_select_f16(i1 %cond0, i1 %cond1, half %arg0, half %arg1) {
553; GFX7-LABEL: select_fneg_select_f16:
554; GFX7:       ; %bb.0:
555; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
557; GFX7-NEXT:    v_cvt_f16_f32_e64 v2, -v2
558; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
559; GFX7-NEXT:    v_and_b32_e32 v1, 1, v1
560; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
561; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
562; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
563; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
564; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
565; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, -v0, vcc
566; GFX7-NEXT:    s_setpc_b64 s[30:31]
567;
568; GFX9-LABEL: select_fneg_select_f16:
569; GFX9:       ; %bb.0:
570; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
572; GFX9-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
573; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
574; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
575; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
576; GFX9-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
577; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
578; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
579; GFX9-NEXT:    s_setpc_b64 s[30:31]
580;
581; GFX11-LABEL: select_fneg_select_f16:
582; GFX11:       ; %bb.0:
583; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
585; GFX11-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
586; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
587; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
588; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
589; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
590; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
591; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
592; GFX11-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
593; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
594; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
595; GFX11-NEXT:    s_setpc_b64 s[30:31]
596  %fneg0 = fneg half %arg0
597  %select0 = select i1 %cond0, half %arg1, half %fneg0
598  %fneg1 = fneg half %select0
599  %select1 = select i1 %cond1, half %fneg1, half %select0
600  ret half %select1
601}
602
603define i16 @select_fneg_xor_select_i16(i1 %cond0, i1 %cond1, i16 %arg0, i16 %arg1) {
604; GCN-LABEL: select_fneg_xor_select_i16:
605; GCN:       ; %bb.0:
606; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
608; GCN-NEXT:    v_xor_b32_e32 v2, 0xffff8000, v2
609; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
610; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
611; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
612; GCN-NEXT:    v_xor_b32_e32 v2, 0xffff8000, v0
613; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
614; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
615; GCN-NEXT:    s_setpc_b64 s[30:31]
616;
617; GFX11-LABEL: select_fneg_xor_select_i16:
618; GFX11:       ; %bb.0:
619; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
621; GFX11-NEXT:    v_xor_b32_e32 v2, 0xffff8000, v2
622; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
623; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
624; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
625; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
626; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
627; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
628; GFX11-NEXT:    v_xor_b32_e32 v2, 0xffff8000, v0
629; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
630; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
631; GFX11-NEXT:    s_setpc_b64 s[30:31]
632  %fneg0 = xor i16 %arg0, -32768
633  %select0 = select i1 %cond0, i16 %arg1, i16 %fneg0
634  %fneg1 = xor i16 %select0, -32768
635  %select1 = select i1 %cond1, i16 %fneg1, i16 %select0
636  ret i16 %select1
637}
638
639define <2 x half> @select_fneg_select_v2f16(<2 x i1> %cond0, <2 x i1> %cond1, <2 x half> %arg0, <2 x half> %arg1) {
640; GFX7-LABEL: select_fneg_select_v2f16:
641; GFX7:       ; %bb.0:
642; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
644; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
645; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
646; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
647; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
648; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
649; GFX7-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
650; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v6
651; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v7
652; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
653; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
654; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
655; GFX7-NEXT:    v_and_b32_e32 v1, 1, v1
656; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
657; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
658; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
659; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
660; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v1
661; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
662; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v0
663; GFX7-NEXT:    v_and_b32_e32 v2, 1, v2
664; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v6
665; GFX7-NEXT:    v_and_b32_e32 v3, 1, v3
666; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
667; GFX7-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
668; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v4
669; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
670; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
671; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
672; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
673; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
674; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
675; GFX7-NEXT:    s_setpc_b64 s[30:31]
676;
677; GFX9-LABEL: select_fneg_select_v2f16:
678; GFX9:       ; %bb.0:
679; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
681; GFX9-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
682; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
683; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
684; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
685; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
686; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
687; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
688; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
689; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
690; GFX9-NEXT:    v_perm_b32 v4, v1, v0, s4
691; GFX9-NEXT:    v_and_b32_e32 v3, 1, v3
692; GFX9-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
693; GFX9-NEXT:    v_and_b32_e32 v2, 1, v2
694; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
695; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
696; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
697; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
698; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
699; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
700; GFX9-NEXT:    s_setpc_b64 s[30:31]
701;
702; GFX11-LABEL: select_fneg_select_v2f16:
703; GFX11:       ; %bb.0:
704; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705; GFX11-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
706; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
707; GFX11-NEXT:    v_and_b32_e32 v3, 1, v3
708; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
709; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
710; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
711; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
712; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc_lo
713; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
714; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
715; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
716; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
717; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
718; GFX11-NEXT:    v_perm_b32 v4, v1, v0, 0x5040100
719; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
720; GFX11-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
721; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
722; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
723; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_and_b32 v2, 1, v2
724; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
725; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
726; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
727; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
728; GFX11-NEXT:    s_setpc_b64 s[30:31]
729  %fneg0 = fneg <2 x half> %arg0
730  %select0 = select <2 x i1> %cond0, <2 x half> %arg1, <2 x half> %fneg0
731  %fneg1 = fneg <2 x half> %select0
732  %select1 = select <2 x i1> %cond1, <2 x half> %fneg1, <2 x half> %select0
733  ret <2 x half> %select1
734}
735
736define <2 x i16> @select_fneg_xor_select_v2i16(<2 x i1> %cond0, <2 x i1> %cond1, <2 x i16> %arg0, <2 x i16> %arg1) {
737; GFX7-LABEL: select_fneg_xor_select_v2i16:
738; GFX7:       ; %bb.0:
739; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX7-NEXT:    v_and_b32_e32 v1, 1, v1
741; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
742; GFX7-NEXT:    v_xor_b32_e32 v5, 0xffff8000, v5
743; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
744; GFX7-NEXT:    v_and_b32_e32 v3, 1, v3
745; GFX7-NEXT:    v_xor_b32_e32 v4, 0xffff8000, v4
746; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
747; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
748; GFX7-NEXT:    v_and_b32_e32 v2, 1, v2
749; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
750; GFX7-NEXT:    v_xor_b32_e32 v5, 0x8000, v1
751; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
752; GFX7-NEXT:    v_xor_b32_e32 v4, 0x8000, v0
753; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
754; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
755; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
756; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
757; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
758; GFX7-NEXT:    v_or_b32_e32 v0, v0, v3
759; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v1
760; GFX7-NEXT:    s_setpc_b64 s[30:31]
761;
762; GFX9-LABEL: select_fneg_xor_select_v2i16:
763; GFX9:       ; %bb.0:
764; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
765; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
766; GFX9-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
767; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
768; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
769; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
770; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
771; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
772; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
773; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
774; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
775; GFX9-NEXT:    v_perm_b32 v4, v1, v0, s4
776; GFX9-NEXT:    v_and_b32_e32 v3, 1, v3
777; GFX9-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
778; GFX9-NEXT:    v_and_b32_e32 v2, 1, v2
779; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
780; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
781; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
782; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
783; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
784; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
785; GFX9-NEXT:    s_setpc_b64 s[30:31]
786;
787; GFX11-LABEL: select_fneg_xor_select_v2i16:
788; GFX11:       ; %bb.0:
789; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790; GFX11-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
791; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
792; GFX11-NEXT:    v_and_b32_e32 v3, 1, v3
793; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
794; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
795; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
796; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
797; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc_lo
798; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
799; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
800; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
801; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
802; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
803; GFX11-NEXT:    v_perm_b32 v4, v1, v0, 0x5040100
804; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
805; GFX11-NEXT:    v_xor_b32_e32 v4, 0x80008000, v4
806; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
807; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
808; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_and_b32 v2, 1, v2
809; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
810; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
811; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
812; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
813; GFX11-NEXT:    s_setpc_b64 s[30:31]
814  %fneg0 = xor <2 x i16> %arg0, <i16 -32768, i16 -32768>
815  %select0 = select <2 x i1> %cond0, <2 x i16> %arg1, <2 x i16> %fneg0
816  %fneg1 = xor <2 x i16> %select0, <i16 -32768, i16 -32768>
817  %select1 = select <2 x i1> %cond1, <2 x i16> %fneg1, <2 x i16> %select0
818  ret <2 x i16> %select1
819}
820
821; pattern that appeared in rocm-device-libs to manually operate on the
822; sign bit of the high half of a double
823define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) {
824; GCN-LABEL: cospiD_pattern0:
825; GCN:       ; %bb.0:
826; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
827; GCN-NEXT:    v_and_b32_e32 v5, 1, v0
828; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
829; GCN-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
830; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
831; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
832; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
833; GCN-NEXT:    v_lshlrev_b32_e32 v0, 31, v0
834; GCN-NEXT:    v_xor_b32_e32 v1, v1, v0
835; GCN-NEXT:    v_mov_b32_e32 v0, v3
836; GCN-NEXT:    s_setpc_b64 s[30:31]
837;
838; GFX11-LABEL: cospiD_pattern0:
839; GFX11:       ; %bb.0:
840; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841; GFX11-NEXT:    v_and_b32_e32 v5, 1, v0
842; GFX11-NEXT:    v_cmp_lt_i32_e64 s0, 1, v0
843; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
844; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
845; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
846; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v3 :: v_dual_cndmask_b32 v1, v2, v4
847; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
848; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 31, v5
849; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v2
850; GFX11-NEXT:    s_setpc_b64 s[30:31]
851  %i = and i32 %arg, 1
852  %i3 = icmp eq i32 %i, 0
853  %i4 = select i1 %i3, double %arg2, double %arg1
854  %i5 = bitcast double %i4 to <2 x i32>
855  %i6 = icmp sgt i32 %arg, 1
856  %i7 = select i1 %i6, i32 -2147483648, i32 0
857  %i8 = extractelement <2 x i32> %i5, i64 1
858  %i9 = xor i32 %i8, %i7
859  %i10 = insertelement <2 x i32> %i5, i32 %i9, i64 1
860  %i11 = bitcast <2 x i32> %i10 to double
861  ret double %i11
862}
863
864define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
865; GCN-LABEL: cospiD_pattern1:
866; GCN:       ; %bb.0:
867; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868; GCN-NEXT:    v_and_b32_e32 v5, 1, v0
869; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
870; GCN-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
871; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
872; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
873; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
874; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
875; GCN-NEXT:    v_mov_b32_e32 v0, v3
876; GCN-NEXT:    s_setpc_b64 s[30:31]
877;
878; GFX11-LABEL: cospiD_pattern1:
879; GFX11:       ; %bb.0:
880; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881; GFX11-NEXT:    v_and_b32_e32 v5, 1, v0
882; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
883; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
884; GFX11-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
885; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc_lo
886; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
887; GFX11-NEXT:    v_mov_b32_e32 v0, v3
888; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
889; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
890; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
891; GFX11-NEXT:    s_setpc_b64 s[30:31]
892  %i = and i32 %arg, 1
893  %i3 = icmp eq i32 %i, 0
894  %i4 = select i1 %i3, double %arg2, double %arg1
895  %i5 = icmp sgt i32 %arg, 1
896  %i6 = fneg double %i4
897  %i7 = select i1 %i5, double %i6, double %i4
898  ret double %i7
899}
900
901; artifical example, scaled to operation on 16-bit halves of a float.
902define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) {
903; GFX7-LABEL: cospiD_pattern0_half:
904; GFX7:       ; %bb.0:
905; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
906; GFX7-NEXT:    v_bfe_i32 v3, v0, 0, 16
907; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
908; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
909; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
910; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v3
911; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
912; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v0
913; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
914; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 31, v2
915; GFX7-NEXT:    v_xor_b32_e32 v0, v2, v0
916; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
917; GFX7-NEXT:    s_setpc_b64 s[30:31]
918;
919; GFX9-LABEL: cospiD_pattern0_half:
920; GFX9:       ; %bb.0:
921; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
922; GFX9-NEXT:    v_and_b32_e32 v3, 1, v0
923; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v3
924; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
925; GFX9-NEXT:    v_cmp_lt_i16_e32 vcc, 1, v0
926; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
927; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
928; GFX9-NEXT:    v_xor_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
929; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
930; GFX9-NEXT:    v_perm_b32 v0, v0, v1, s4
931; GFX9-NEXT:    s_setpc_b64 s[30:31]
932;
933; GFX11-LABEL: cospiD_pattern0_half:
934; GFX11:       ; %bb.0:
935; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936; GFX11-NEXT:    v_and_b32_e32 v3, 1, v0
937; GFX11-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 1, v0
938; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
939; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
940; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v3
941; GFX11-NEXT:    v_lshlrev_b16 v0, 15, v0
942; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
943; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
944; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
945; GFX11-NEXT:    v_xor_b32_e32 v0, v2, v0
946; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
947; GFX11-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
948; GFX11-NEXT:    s_setpc_b64 s[30:31]
949  %i = and i16 %arg, 1
950  %i3 = icmp eq i16 %i, 0
951  %i4 = select i1 %i3, float %arg2, float %arg1
952  %i5 = bitcast float %i4 to <2 x i16>
953  %i6 = icmp sgt i16 %arg, 1
954  %i7 = select i1 %i6, i16 -32768, i16 0
955  %i8 = extractelement <2 x i16> %i5, i64 1
956  %i9 = xor i16 %i8, %i7
957  %i10 = insertelement <2 x i16> %i5, i16 %i9, i64 1
958  %i11 = bitcast <2 x i16> %i10 to float
959  ret float %i11
960}
961
962define float @cospiD_pattern1_half(i16 %arg, float %arg1, float %arg2) {
963; GFX7-LABEL: cospiD_pattern1_half:
964; GFX7:       ; %bb.0:
965; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966; GFX7-NEXT:    v_bfe_i32 v3, v0, 0, 16
967; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
968; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
969; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
970; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v3
971; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, -v0, vcc
972; GFX7-NEXT:    s_setpc_b64 s[30:31]
973;
974; GFX9-LABEL: cospiD_pattern1_half:
975; GFX9:       ; %bb.0:
976; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; GFX9-NEXT:    v_and_b32_e32 v3, 1, v0
978; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v3
979; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
980; GFX9-NEXT:    v_cmp_lt_i16_e32 vcc, 1, v0
981; GFX9-NEXT:    v_cndmask_b32_e64 v0, v1, -v1, vcc
982; GFX9-NEXT:    s_setpc_b64 s[30:31]
983;
984; GFX11-LABEL: cospiD_pattern1_half:
985; GFX11:       ; %bb.0:
986; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
987; GFX11-NEXT:    v_and_b32_e32 v3, 1, v0
988; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
989; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v3
990; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
991; GFX11-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 1, v0
992; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, -v1, vcc_lo
993; GFX11-NEXT:    s_setpc_b64 s[30:31]
994  %i = and i16 %arg, 1
995  %i3 = icmp eq i16 %i, 0
996  %i4 = select i1 %i3, float %arg2, float %arg1
997  %i5 = icmp sgt i16 %arg, 1
998  %i6 = fneg float %i4
999  %i7 = select i1 %i5, float %i6, float %i4
1000  ret float %i7
1001}
1002
1003define double @fneg_f64_bitcast_vector_i64_to_f64(i64 %arg) {
1004; GCN-LABEL: fneg_f64_bitcast_vector_i64_to_f64:
1005; GCN:       ; %bb.0:
1006; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1008; GCN-NEXT:    s_setpc_b64 s[30:31]
1009;
1010; GFX11-LABEL: fneg_f64_bitcast_vector_i64_to_f64:
1011; GFX11:       ; %bb.0:
1012; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1014; GFX11-NEXT:    s_setpc_b64 s[30:31]
1015  %bitcast = bitcast i64 %arg to double
1016  %fneg = fneg double %bitcast
1017  ret double %fneg
1018}
1019
1020define double @fneg_f64_bitcast_vector_v2i32_to_f64(<2 x i32> %arg) {
1021; GCN-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64:
1022; GCN:       ; %bb.0:
1023; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1025; GCN-NEXT:    s_setpc_b64 s[30:31]
1026;
1027; GFX11-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64:
1028; GFX11:       ; %bb.0:
1029; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1031; GFX11-NEXT:    s_setpc_b64 s[30:31]
1032  %bitcast = bitcast <2 x i32> %arg to double
1033  %fneg = fneg double %bitcast
1034  ret double %fneg
1035}
1036
1037define double @fneg_f64_bitcast_vector_v2f32_to_f64(<2 x float> %arg) {
1038; GCN-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64:
1039; GCN:       ; %bb.0:
1040; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1042; GCN-NEXT:    s_setpc_b64 s[30:31]
1043;
1044; GFX11-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64:
1045; GFX11:       ; %bb.0:
1046; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1047; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1048; GFX11-NEXT:    s_setpc_b64 s[30:31]
1049  %bitcast = bitcast <2 x float> %arg to double
1050  %fneg = fneg double %bitcast
1051  ret double %fneg
1052}
1053
1054define double @fneg_f64_bitcast_vector_v4i16_to_f64(<4 x i16> %arg) {
1055; GFX7-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
1056; GFX7:       ; %bb.0:
1057; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1059; GFX7-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1060; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1061; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1062; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1063; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1064; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1065; GFX7-NEXT:    s_setpc_b64 s[30:31]
1066;
1067; GFX9-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
1068; GFX9:       ; %bb.0:
1069; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1071; GFX9-NEXT:    s_setpc_b64 s[30:31]
1072;
1073; GFX11-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
1074; GFX11:       ; %bb.0:
1075; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1077; GFX11-NEXT:    s_setpc_b64 s[30:31]
1078  %bitcast = bitcast <4 x i16> %arg to double
1079  %fneg = fneg double %bitcast
1080  ret double %fneg
1081}
1082
1083define double @fneg_f64_bitcast_vector_v4f16_to_f64(<4 x half> %arg) {
1084; GFX7-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
1085; GFX7:       ; %bb.0:
1086; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1088; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1089; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1090; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1091; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1092; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1093; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1094; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1095; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1096; GFX7-NEXT:    s_setpc_b64 s[30:31]
1097;
1098; GFX9-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
1099; GFX9:       ; %bb.0:
1100; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1102; GFX9-NEXT:    s_setpc_b64 s[30:31]
1103;
1104; GFX11-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
1105; GFX11:       ; %bb.0:
1106; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1108; GFX11-NEXT:    s_setpc_b64 s[30:31]
1109  %bitcast = bitcast <4 x half> %arg to double
1110  %fneg = fneg double %bitcast
1111  ret double %fneg
1112}
1113
1114define double @fneg_f64_bitcast_build_vector_v2i32_to_f64(i32 %elt0, i32 %elt1) {
1115; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64:
1116; GCN:       ; %bb.0:
1117; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1119; GCN-NEXT:    s_setpc_b64 s[30:31]
1120;
1121; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64:
1122; GFX11:       ; %bb.0:
1123; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1125; GFX11-NEXT:    s_setpc_b64 s[30:31]
1126  %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1127  %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1
1128  %bitcast = bitcast <2 x i32> %insert.1 to double
1129  %fneg = fneg double %bitcast
1130  ret double %fneg
1131}
1132
1133define double @fneg_f64_bitcast_build_vector_v2f32_to_f64(float %elt0, float %elt1) {
1134; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64:
1135; GCN:       ; %bb.0:
1136; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1138; GCN-NEXT:    s_setpc_b64 s[30:31]
1139;
1140; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64:
1141; GFX11:       ; %bb.0:
1142; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1143; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1144; GFX11-NEXT:    s_setpc_b64 s[30:31]
1145  %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1146  %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1147  %bitcast = bitcast <2 x float> %insert.1 to double
1148  %fneg = fneg double %bitcast
1149  ret double %fneg
1150}
1151
1152define double @fneg_f64_bitcast_build_vector_v4i16_to_f64(i16 %elt0, i16 %elt1, i16 %elt2, i16 %elt3) {
1153; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
1154; GFX7:       ; %bb.0:
1155; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1156; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1157; GFX7-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1158; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1159; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1160; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1161; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1162; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1163; GFX7-NEXT:    s_setpc_b64 s[30:31]
1164;
1165; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
1166; GFX9:       ; %bb.0:
1167; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
1169; GFX9-NEXT:    v_perm_b32 v2, v3, v2, s4
1170; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
1171; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1172; GFX9-NEXT:    s_setpc_b64 s[30:31]
1173;
1174; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
1175; GFX11:       ; %bb.0:
1176; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
1178; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
1179; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1180; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1181; GFX11-NEXT:    s_setpc_b64 s[30:31]
1182  %insert.0 = insertelement <4 x i16> poison, i16 %elt0, i32 0
1183  %insert.1 = insertelement <4 x i16> %insert.0, i16 %elt1, i32 1
1184  %insert.2 = insertelement <4 x i16> %insert.1, i16 %elt2, i32 2
1185  %insert.3 = insertelement <4 x i16> %insert.2, i16 %elt3, i32 3
1186  %bitcast = bitcast <4 x i16> %insert.3 to double
1187  %fneg = fneg double %bitcast
1188  ret double %fneg
1189}
1190
1191define double @fneg_f64_bitcast_build_vector_v4f16_to_f64(half %elt0, half %elt1, half %elt2, half %elt3) {
1192; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
1193; GFX7:       ; %bb.0:
1194; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1195; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1196; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1197; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1198; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1199; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1200; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1201; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1202; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1203; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1204; GFX7-NEXT:    s_setpc_b64 s[30:31]
1205;
1206; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
1207; GFX9:       ; %bb.0:
1208; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
1210; GFX9-NEXT:    v_perm_b32 v2, v3, v2, s4
1211; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
1212; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1213; GFX9-NEXT:    s_setpc_b64 s[30:31]
1214;
1215; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
1216; GFX11:       ; %bb.0:
1217; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
1219; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
1220; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1221; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1222; GFX11-NEXT:    s_setpc_b64 s[30:31]
1223  %insert.0 = insertelement <4 x half> poison, half %elt0, i32 0
1224  %insert.1 = insertelement <4 x half> %insert.0, half %elt1, i32 1
1225  %insert.2 = insertelement <4 x half> %insert.1, half %elt2, i32 2
1226  %insert.3 = insertelement <4 x half> %insert.2, half %elt3, i32 3
1227  %bitcast = bitcast <4 x half> %insert.3 to double
1228  %fneg = fneg double %bitcast
1229  ret double %fneg
1230}
1231
1232define double @fneg_f64_bitcast_build_vector_v4bf16_to_f64(bfloat %elt0, bfloat %elt1, bfloat %elt2, bfloat %elt3) {
1233; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
1234; GFX7:       ; %bb.0:
1235; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236; GFX7-NEXT:    v_mul_f32_e32 v3, 1.0, v3
1237; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
1238; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1239; GFX7-NEXT:    v_mul_f32_e32 v2, 1.0, v2
1240; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
1241; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1242; GFX7-NEXT:    v_alignbit_b32 v2, v3, v2, 16
1243; GFX7-NEXT:    v_alignbit_b32 v0, v1, v0, 16
1244; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1245; GFX7-NEXT:    s_setpc_b64 s[30:31]
1246;
1247; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
1248; GFX9:       ; %bb.0:
1249; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1250; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
1251; GFX9-NEXT:    v_perm_b32 v2, v3, v2, s4
1252; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
1253; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1254; GFX9-NEXT:    s_setpc_b64 s[30:31]
1255;
1256; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
1257; GFX11:       ; %bb.0:
1258; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1259; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
1260; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
1261; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1262; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v2
1263; GFX11-NEXT:    s_setpc_b64 s[30:31]
1264  %insert.0 = insertelement <4 x bfloat> poison, bfloat %elt0, i32 0
1265  %insert.1 = insertelement <4 x bfloat> %insert.0, bfloat %elt1, i32 1
1266  %insert.2 = insertelement <4 x bfloat> %insert.1, bfloat %elt2, i32 2
1267  %insert.3 = insertelement <4 x bfloat> %insert.2, bfloat %elt3, i32 3
1268  %bitcast = bitcast <4 x bfloat> %insert.3 to double
1269  %fneg = fneg double %bitcast
1270  ret double %fneg
1271}
1272
1273define double @fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user(i32 %elt0, i32 %elt1, double %fp.val) {
1274; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user:
1275; GCN:       ; %bb.0:
1276; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277; GCN-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
1278; GCN-NEXT:    s_setpc_b64 s[30:31]
1279;
1280; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user:
1281; GFX11:       ; %bb.0:
1282; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1283; GFX11-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
1284; GFX11-NEXT:    s_setpc_b64 s[30:31]
1285  %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1286  %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1
1287  %bitcast = bitcast <2 x i32> %insert.1 to double
1288  %fneg = fneg double %bitcast
1289  %fmul = fmul double %fneg, %fp.val
1290  ret double %fmul
1291}
1292
1293define { double, double } @fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user(i32 %elt0, i32 %elt1, double %fp.val0, double %fp.val1) {
1294; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user:
1295; GCN:       ; %bb.0:
1296; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297; GCN-NEXT:    v_mul_f64 v[6:7], -v[0:1], v[2:3]
1298; GCN-NEXT:    v_mul_f64 v[2:3], v[4:5], -v[0:1]
1299; GCN-NEXT:    v_mov_b32_e32 v0, v6
1300; GCN-NEXT:    v_mov_b32_e32 v1, v7
1301; GCN-NEXT:    s_setpc_b64 s[30:31]
1302;
1303; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user:
1304; GFX11:       ; %bb.0:
1305; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1306; GFX11-NEXT:    v_mul_f64 v[6:7], -v[0:1], v[2:3]
1307; GFX11-NEXT:    v_mul_f64 v[2:3], v[4:5], -v[0:1]
1308; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1309; GFX11-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
1310; GFX11-NEXT:    s_setpc_b64 s[30:31]
1311  %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1312  %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1
1313  %bitcast = bitcast <2 x i32> %insert.1 to double
1314  %fneg = fneg double %bitcast
1315  %fmul0 = fmul double %fneg, %fp.val0
1316  %fmul1 = fmul double %fp.val1, %fneg
1317  %ret.0 = insertvalue { double, double } poison, double %fmul0, 0
1318  %ret.1 = insertvalue { double, double } %ret.0, double %fmul1, 1
1319  ret { double, double } %ret.1
1320}
1321
1322define double @fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source(i32 %elt0, i32 %elt1, double %fp.val) {
1323; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source:
1324; GCN:       ; %bb.0:
1325; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1326; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1327; GCN-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
1328; GCN-NEXT:    s_setpc_b64 s[30:31]
1329;
1330; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source:
1331; GFX11:       ; %bb.0:
1332; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1333; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1334; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1335; GFX11-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
1336; GFX11-NEXT:    s_setpc_b64 s[30:31]
1337  %neg.elt1 = xor i32 %elt1, -2147483648
1338  %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
1339  %insert.1 = insertelement <2 x i32> %insert.0, i32 %neg.elt1, i32 1
1340  %bitcast = bitcast <2 x i32> %insert.1 to double
1341  %fneg = fneg double %bitcast
1342  %fmul = fmul double %fneg, %fp.val
1343  ret double %fmul
1344}
1345
1346define double @fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64(float %elt0, float %elt1) {
1347; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
1348; GCN:       ; %bb.0:
1349; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1350; GCN-NEXT:    v_sub_f32_e32 v1, -2.0, v1
1351; GCN-NEXT:    s_setpc_b64 s[30:31]
1352;
1353; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
1354; GFX11:       ; %bb.0:
1355; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356; GFX11-NEXT:    v_sub_f32_e32 v1, -2.0, v1
1357; GFX11-NEXT:    s_setpc_b64 s[30:31]
1358  %fadd = fadd nsz nnan float %elt1, 2.0
1359  %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1360  %insert.1 = insertelement <2 x float> %insert.0, float %fadd, i32 1
1361  %bitcast = bitcast <2 x float> %insert.1 to double
1362  %fneg = fneg double %bitcast
1363  ret double %fneg
1364}
1365
1366define double @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user(float %elt0, float %elt1, ptr addrspace(1) %ptr) {
1367; GFX7-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
1368; GFX7:       ; %bb.0:
1369; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1370; GFX7-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1371; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1372; GFX7-NEXT:    s_waitcnt vmcnt(0)
1373; GFX7-NEXT:    s_setpc_b64 s[30:31]
1374;
1375; GFX9-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
1376; GFX9:       ; %bb.0:
1377; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1378; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
1379; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1380; GFX9-NEXT:    s_waitcnt vmcnt(0)
1381; GFX9-NEXT:    s_setpc_b64 s[30:31]
1382;
1383; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
1384; GFX11:       ; %bb.0:
1385; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386; GFX11-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
1387; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
1388; GFX11-NEXT:    v_mov_b32_e32 v1, v4
1389; GFX11-NEXT:    s_setpc_b64 s[30:31]
1390  %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1391  %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1392  store <2 x float> %insert.1, ptr addrspace(1) %ptr
1393  %bitcast = bitcast <2 x float> %insert.1 to double
1394  %fneg = fneg double %bitcast
1395  ret double %fneg
1396}
1397
1398define { double, <2 x float> } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user(float %elt0, float %elt1, <2 x float> %arg.v2f32) {
1399; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user:
1400; GCN:       ; %bb.0:
1401; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402; GCN-NEXT:    v_add_f32_e32 v2, v0, v2
1403; GCN-NEXT:    v_add_f32_e32 v3, v1, v3
1404; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1405; GCN-NEXT:    s_setpc_b64 s[30:31]
1406;
1407; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user:
1408; GFX11:       ; %bb.0:
1409; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1410; GFX11-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
1411; GFX11-NEXT:    v_dual_add_f32 v2, v0, v2 :: v_dual_add_f32 v3, v1, v3
1412; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1413; GFX11-NEXT:    v_mov_b32_e32 v1, v4
1414; GFX11-NEXT:    s_setpc_b64 s[30:31]
1415  %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1416  %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1417  %other.bitcast.source.user = fadd <2 x float> %insert.1, %arg.v2f32
1418  %bitcast = bitcast <2 x float> %insert.1 to double
1419  %fneg = fneg double %bitcast
1420  %ret.0 = insertvalue { double, <2 x float> } poison, double %fneg, 0
1421  %ret.1 = insertvalue { double, <2 x float> } %ret.0, <2 x float> %other.bitcast.source.user, 1
1422  ret { double, <2 x float> } %ret.1
1423}
1424
1425define { double, double } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user(float %elt0, float %elt1) {
1426; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user:
1427; GCN:       ; %bb.0:
1428; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429; GCN-NEXT:    v_mov_b32_e32 v3, v1
1430; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
1431; GCN-NEXT:    v_mov_b32_e32 v2, v0
1432; GCN-NEXT:    s_setpc_b64 s[30:31]
1433;
1434; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user:
1435; GFX11:       ; %bb.0:
1436; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437; GFX11-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
1438; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1439; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
1440; GFX11-NEXT:    s_setpc_b64 s[30:31]
1441  %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1442  %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1443  %bitcast = bitcast <2 x float> %insert.1 to double
1444  %fneg = fneg double %bitcast
1445  %ret.0 = insertvalue { double, double } poison, double %fneg, 0
1446  %ret.1 = insertvalue { double, double } %ret.0, double %bitcast, 1
1447  ret { double, double } %ret.1
1448}
1449
1450define { double, double } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user(float %elt0, float %elt1, double %arg.f64) {
1451; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user:
1452; GCN:       ; %bb.0:
1453; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1454; GCN-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
1455; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1456; GCN-NEXT:    s_setpc_b64 s[30:31]
1457;
1458; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user:
1459; GFX11:       ; %bb.0:
1460; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461; GFX11-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
1462; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1463; GFX11-NEXT:    s_setpc_b64 s[30:31]
1464  %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
1465  %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
1466  %bitcast = bitcast <2 x float> %insert.1 to double
1467  %other.bitcast.user = fadd double %bitcast, %arg.f64
1468  %fneg = fneg double %bitcast
1469  %ret.0 = insertvalue { double, double } poison, double %fneg, 0
1470  %ret.1 = insertvalue { double, double } %ret.0, double %other.bitcast.user, 1
1471  ret { double, double } %ret.1
1472}
1473
1474; Check for correct bitcasting back when there are multiple uses
1475define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i1 %z, ptr addrspace(1) %dst) {
1476; GFX7-LABEL: multiple_uses_fneg_select_f64:
1477; GFX7:       ; %bb.0:
1478; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x4
1479; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1480; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x6
1481; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1482; GFX7-NEXT:    s_bitcmp1_b32 s6, 0
1483; GFX7-NEXT:    s_cselect_b64 vcc, -1, 0
1484; GFX7-NEXT:    s_and_b64 s[6:7], vcc, exec
1485; GFX7-NEXT:    v_mov_b32_e32 v0, s3
1486; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1487; GFX7-NEXT:    s_cselect_b32 s1, s1, s3
1488; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1489; GFX7-NEXT:    s_cselect_b32 s0, s0, s2
1490; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1491; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1492; GFX7-NEXT:    v_cndmask_b32_e64 v1, v1, -v0, vcc
1493; GFX7-NEXT:    v_mov_b32_e32 v0, s0
1494; GFX7-NEXT:    v_mov_b32_e32 v3, s5
1495; GFX7-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1496; GFX7-NEXT:    s_endpgm
1497;
1498; GFX9-LABEL: multiple_uses_fneg_select_f64:
1499; GFX9:       ; %bb.0:
1500; GFX9-NEXT:    s_load_dword s6, s[8:9], 0x10
1501; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1502; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x18
1503; GFX9-NEXT:    v_mov_b32_e32 v2, 0
1504; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1505; GFX9-NEXT:    s_bitcmp1_b32 s6, 0
1506; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
1507; GFX9-NEXT:    s_and_b64 s[6:7], vcc, exec
1508; GFX9-NEXT:    v_mov_b32_e32 v0, s3
1509; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1510; GFX9-NEXT:    s_cselect_b32 s1, s1, s3
1511; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1512; GFX9-NEXT:    s_cselect_b32 s0, s0, s2
1513; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1514; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, -v0, vcc
1515; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1516; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
1517; GFX9-NEXT:    s_endpgm
1518;
1519; GFX11-LABEL: multiple_uses_fneg_select_f64:
1520; GFX11:       ; %bb.0:
1521; GFX11-NEXT:    s_clause 0x2
1522; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1523; GFX11-NEXT:    s_load_b32 s6, s[4:5], 0x10
1524; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x18
1525; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1526; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1527; GFX11-NEXT:    v_mov_b32_e32 v0, s1
1528; GFX11-NEXT:    s_bitcmp1_b32 s6, 0
1529; GFX11-NEXT:    s_cselect_b32 vcc_lo, -1, 0
1530; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1531; GFX11-NEXT:    v_cndmask_b32_e32 v0, s3, v0, vcc_lo
1532; GFX11-NEXT:    s_and_b32 s6, vcc_lo, exec_lo
1533; GFX11-NEXT:    s_cselect_b32 s1, s1, s3
1534; GFX11-NEXT:    s_cselect_b32 s0, s0, s2
1535; GFX11-NEXT:    v_cndmask_b32_e64 v1, s1, -v0, vcc_lo
1536; GFX11-NEXT:    v_mov_b32_e32 v0, s0
1537; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[4:5]
1538; GFX11-NEXT:    s_endpgm
1539  %a = select i1 %z, double %x, double %y
1540  %b = fneg double %a
1541  %c = select i1 %z, double %a, double %b
1542  %d = fneg double %c
1543  store double %d, ptr addrspace(1) %dst
1544  ret void
1545}
1546
1547define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
1548; GCN-LABEL: fnge_select_f32_multi_use_regression:
1549; GCN:       ; %bb.0: ; %.entry
1550; GCN-NEXT:    s_load_dword s0, s[8:9], 0x0
1551; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1552; GCN-NEXT:    v_cmp_nlt_f32_e64 s[0:1], s0, 0
1553; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1554; GCN-NEXT:    v_cmp_nge_f32_e32 vcc, 0, v0
1555; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
1556; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1557; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
1558; GCN-NEXT:    s_and_b64 vcc, exec, vcc
1559; GCN-NEXT:    s_endpgm
1560;
1561; GFX11-LABEL: fnge_select_f32_multi_use_regression:
1562; GFX11:       ; %bb.0: ; %.entry
1563; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
1564; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1565; GFX11-NEXT:    v_cmp_nlt_f32_e64 s0, s0, 0
1566; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1567; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
1568; GFX11-NEXT:    v_cmp_nge_f32_e32 vcc_lo, 0, v0
1569; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc_lo
1570; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1571; GFX11-NEXT:    v_mul_f32_e64 v0, -v0, v1
1572; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0, v0
1573; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, vcc_lo
1574; GFX11-NEXT:    s_endpgm
1575.entry:
1576  %i = fcmp uge float %.i2369, 0.000000e+00
1577  %.i2379 = select i1 %i, i32 1, i32 0
1578  %.i0436 = bitcast i32 %.i2379 to float
1579  %.i0440 = fneg float %.i0436
1580  %i1 = fcmp ugt float %.i0436, 0.000000e+00
1581  %.i2495 = select i1 %i1, i32 %.i2379, i32 0
1582  %.i0552 = bitcast i32 %.i2495 to float
1583  %.i0592 = fmul float %.i0440, %.i0552
1584  %.i0721 = fcmp oge float %.i0592, 0.000000e+00
1585  br i1 %.i0721, label %bb5, label %bb
1586
1587bb:                                               ; preds = %.entry
1588  %i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0)
1589  %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1590  %i4 = bitcast <4 x i32> %i3 to <4 x float>
1591  %.i0753 = extractelement <4 x float> %i4, i64 0
1592  br label %bb5
1593
1594bb5:                                              ; preds = %bb, %.entry
1595  ret void
1596}
1597
1598
1599declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #0
1600
1601attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
1602