xref: /llvm-project/llvm/test/CodeGen/AMDGPU/bfi_int.ll (revision 09fc311af702e06fbb7a89cdee13a61face102ed)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
5; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-GISEL %s
6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10-GISEL %s
7
8; BFI_INT Definition pattern from ISA docs
9; (y & x) | (z & ~x)
10;
11define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
12; GFX7-LABEL: s_bfi_def_i32:
13; GFX7:       ; %bb.0: ; %entry
14; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
15; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
16; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
17; GFX7-NEXT:    s_mov_b32 s3, 0xf000
18; GFX7-NEXT:    s_mov_b32 s2, -1
19; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20; GFX7-NEXT:    s_andn2_b32 s6, s6, s4
21; GFX7-NEXT:    s_and_b32 s4, s5, s4
22; GFX7-NEXT:    s_or_b32 s4, s6, s4
23; GFX7-NEXT:    v_mov_b32_e32 v0, s4
24; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
25; GFX7-NEXT:    s_endpgm
26;
27; GFX8-LABEL: s_bfi_def_i32:
28; GFX8:       ; %bb.0: ; %entry
29; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
30; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
31; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
32; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
33; GFX8-NEXT:    s_andn2_b32 s4, s4, s2
34; GFX8-NEXT:    s_and_b32 s2, s3, s2
35; GFX8-NEXT:    s_or_b32 s2, s4, s2
36; GFX8-NEXT:    v_mov_b32_e32 v0, s0
37; GFX8-NEXT:    v_mov_b32_e32 v1, s1
38; GFX8-NEXT:    v_mov_b32_e32 v2, s2
39; GFX8-NEXT:    flat_store_dword v[0:1], v2
40; GFX8-NEXT:    s_endpgm
41;
42; GFX10-LABEL: s_bfi_def_i32:
43; GFX10:       ; %bb.0: ; %entry
44; GFX10-NEXT:    s_clause 0x2
45; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
46; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x34
47; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
48; GFX10-NEXT:    v_mov_b32_e32 v0, 0
49; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
50; GFX10-NEXT:    s_andn2_b32 s4, s4, s2
51; GFX10-NEXT:    s_and_b32 s2, s3, s2
52; GFX10-NEXT:    s_or_b32 s2, s4, s2
53; GFX10-NEXT:    v_mov_b32_e32 v1, s2
54; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
55; GFX10-NEXT:    s_endpgm
56;
57; GFX8-GISEL-LABEL: s_bfi_def_i32:
58; GFX8-GISEL:       ; %bb.0: ; %entry
59; GFX8-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
60; GFX8-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
61; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
62; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
63; GFX8-GISEL-NEXT:    s_andn2_b32 s4, s4, s2
64; GFX8-GISEL-NEXT:    s_and_b32 s2, s3, s2
65; GFX8-GISEL-NEXT:    s_or_b32 s2, s4, s2
66; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
67; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s2
68; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
69; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
70; GFX8-GISEL-NEXT:    s_endpgm
71;
72; GFX10-GISEL-LABEL: s_bfi_def_i32:
73; GFX10-GISEL:       ; %bb.0: ; %entry
74; GFX10-GISEL-NEXT:    s_clause 0x2
75; GFX10-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
76; GFX10-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
77; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
78; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
79; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
80; GFX10-GISEL-NEXT:    s_andn2_b32 s4, s4, s2
81; GFX10-GISEL-NEXT:    s_and_b32 s2, s3, s2
82; GFX10-GISEL-NEXT:    s_or_b32 s2, s4, s2
83; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
84; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
85; GFX10-GISEL-NEXT:    s_endpgm
86entry:
87  %0 = xor i32 %x, -1
88  %1 = and i32 %z, %0
89  %2 = and i32 %y, %x
90  %3 = or i32 %1, %2
91  store i32 %3, i32 addrspace(1)* %out
92  ret void
93}
94
95define i32 @v_bfi_def_i32(i32 %x, i32 %y, i32 %z) {
96; GFX7-LABEL: v_bfi_def_i32:
97; GFX7:       ; %bb.0: ; %entry
98; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX7-NEXT:    v_bfi_b32 v0, v0, v1, v2
100; GFX7-NEXT:    s_setpc_b64 s[30:31]
101;
102; GFX8-LABEL: v_bfi_def_i32:
103; GFX8:       ; %bb.0: ; %entry
104; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX8-NEXT:    v_bfi_b32 v0, v0, v1, v2
106; GFX8-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX10-LABEL: v_bfi_def_i32:
109; GFX10:       ; %bb.0: ; %entry
110; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
112; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, v2
113; GFX10-NEXT:    s_setpc_b64 s[30:31]
114;
115; GFX8-GISEL-LABEL: v_bfi_def_i32:
116; GFX8-GISEL:       ; %bb.0: ; %entry
117; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
119; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
120;
121; GFX10-GISEL-LABEL: v_bfi_def_i32:
122; GFX10-GISEL:       ; %bb.0: ; %entry
123; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
125; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
126; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
127entry:
128  %0 = xor i32 %x, -1
129  %1 = and i32 %z, %0
130  %2 = and i32 %y, %x
131  %3 = or i32 %1, %2
132  ret i32 %3
133}
134
135; SHA-256 Ch function
136; z ^ (x & (y ^ z))
137define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
138; GFX7-LABEL: s_bfi_sha256_ch:
139; GFX7:       ; %bb.0: ; %entry
140; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
141; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
142; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
143; GFX7-NEXT:    s_mov_b32 s3, 0xf000
144; GFX7-NEXT:    s_mov_b32 s2, -1
145; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
146; GFX7-NEXT:    s_xor_b32 s5, s5, s6
147; GFX7-NEXT:    s_and_b32 s4, s4, s5
148; GFX7-NEXT:    s_xor_b32 s4, s6, s4
149; GFX7-NEXT:    v_mov_b32_e32 v0, s4
150; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
151; GFX7-NEXT:    s_endpgm
152;
153; GFX8-LABEL: s_bfi_sha256_ch:
154; GFX8:       ; %bb.0: ; %entry
155; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
156; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
157; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
158; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
159; GFX8-NEXT:    s_xor_b32 s3, s3, s4
160; GFX8-NEXT:    s_and_b32 s2, s2, s3
161; GFX8-NEXT:    s_xor_b32 s2, s4, s2
162; GFX8-NEXT:    v_mov_b32_e32 v0, s0
163; GFX8-NEXT:    v_mov_b32_e32 v1, s1
164; GFX8-NEXT:    v_mov_b32_e32 v2, s2
165; GFX8-NEXT:    flat_store_dword v[0:1], v2
166; GFX8-NEXT:    s_endpgm
167;
168; GFX10-LABEL: s_bfi_sha256_ch:
169; GFX10:       ; %bb.0: ; %entry
170; GFX10-NEXT:    s_clause 0x2
171; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
172; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x34
173; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
174; GFX10-NEXT:    v_mov_b32_e32 v0, 0
175; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
176; GFX10-NEXT:    s_xor_b32 s3, s3, s4
177; GFX10-NEXT:    s_and_b32 s2, s2, s3
178; GFX10-NEXT:    s_xor_b32 s2, s4, s2
179; GFX10-NEXT:    v_mov_b32_e32 v1, s2
180; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
181; GFX10-NEXT:    s_endpgm
182;
183; GFX8-GISEL-LABEL: s_bfi_sha256_ch:
184; GFX8-GISEL:       ; %bb.0: ; %entry
185; GFX8-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
186; GFX8-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
187; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
188; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
189; GFX8-GISEL-NEXT:    s_xor_b32 s3, s3, s4
190; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, s3
191; GFX8-GISEL-NEXT:    s_xor_b32 s2, s4, s2
192; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
193; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s2
194; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
195; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
196; GFX8-GISEL-NEXT:    s_endpgm
197;
198; GFX10-GISEL-LABEL: s_bfi_sha256_ch:
199; GFX10-GISEL:       ; %bb.0: ; %entry
200; GFX10-GISEL-NEXT:    s_clause 0x2
201; GFX10-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
202; GFX10-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
203; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
204; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
205; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
206; GFX10-GISEL-NEXT:    s_xor_b32 s3, s3, s4
207; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, s3
208; GFX10-GISEL-NEXT:    s_xor_b32 s2, s4, s2
209; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
210; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
211; GFX10-GISEL-NEXT:    s_endpgm
212entry:
213  %0 = xor i32 %y, %z
214  %1 = and i32 %x, %0
215  %2 = xor i32 %z, %1
216  store i32 %2, i32 addrspace(1)* %out
217  ret void
218}
219
220define i32 @v_bfi_sha256_ch(i32 %x, i32 %y, i32 %z) {
221; GFX7-LABEL: v_bfi_sha256_ch:
222; GFX7:       ; %bb.0: ; %entry
223; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GFX7-NEXT:    v_bfi_b32 v0, v0, v1, v2
225; GFX7-NEXT:    s_setpc_b64 s[30:31]
226;
227; GFX8-LABEL: v_bfi_sha256_ch:
228; GFX8:       ; %bb.0: ; %entry
229; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230; GFX8-NEXT:    v_bfi_b32 v0, v0, v1, v2
231; GFX8-NEXT:    s_setpc_b64 s[30:31]
232;
233; GFX10-LABEL: v_bfi_sha256_ch:
234; GFX10:       ; %bb.0: ; %entry
235; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
237; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, v2
238; GFX10-NEXT:    s_setpc_b64 s[30:31]
239;
240; GFX8-GISEL-LABEL: v_bfi_sha256_ch:
241; GFX8-GISEL:       ; %bb.0: ; %entry
242; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
244; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
245;
246; GFX10-GISEL-LABEL: v_bfi_sha256_ch:
247; GFX10-GISEL:       ; %bb.0: ; %entry
248; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
250; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
251; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
252entry:
253  %0 = xor i32 %y, %z
254  %1 = and i32 %x, %0
255  %2 = xor i32 %z, %1
256  ret i32 %2
257}
258
259define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z) {
260; GFX7-LABEL: v_s_s_bfi_sha256_ch:
261; GFX7:       ; %bb.0: ; %entry
262; GFX7-NEXT:    v_mov_b32_e32 v1, s1
263; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v1
264; GFX7-NEXT:    ; return to shader part epilog
265;
266; GFX8-LABEL: v_s_s_bfi_sha256_ch:
267; GFX8:       ; %bb.0: ; %entry
268; GFX8-NEXT:    v_mov_b32_e32 v1, s1
269; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v1
270; GFX8-NEXT:    ; return to shader part epilog
271;
272; GFX10-LABEL: v_s_s_bfi_sha256_ch:
273; GFX10:       ; %bb.0: ; %entry
274; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s1
275; GFX10-NEXT:    ; return to shader part epilog
276;
277; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ch:
278; GFX8-GISEL:       ; %bb.0: ; %entry
279; GFX8-GISEL-NEXT:    s_xor_b32 s0, s0, s1
280; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
281; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
282; GFX8-GISEL-NEXT:    ; return to shader part epilog
283;
284; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ch:
285; GFX10-GISEL:       ; %bb.0: ; %entry
286; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, s1
287; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
288; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
289; GFX10-GISEL-NEXT:    ; return to shader part epilog
290entry:
291  %xor0 = xor i32 %y, %z
292  %and = and i32 %x, %xor0
293  %xor1 = xor i32 %z, %and
294  %cast = bitcast i32 %xor1 to float
295  ret float %cast
296}
297
298define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z) {
299; GFX7-LABEL: s_v_s_bfi_sha256_ch:
300; GFX7:       ; %bb.0: ; %entry
301; GFX7-NEXT:    v_mov_b32_e32 v1, s1
302; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v1
303; GFX7-NEXT:    ; return to shader part epilog
304;
305; GFX8-LABEL: s_v_s_bfi_sha256_ch:
306; GFX8:       ; %bb.0: ; %entry
307; GFX8-NEXT:    v_mov_b32_e32 v1, s1
308; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v1
309; GFX8-NEXT:    ; return to shader part epilog
310;
311; GFX10-LABEL: s_v_s_bfi_sha256_ch:
312; GFX10:       ; %bb.0: ; %entry
313; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s1
314; GFX10-NEXT:    ; return to shader part epilog
315;
316; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ch:
317; GFX8-GISEL:       ; %bb.0: ; %entry
318; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
319; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
320; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
321; GFX8-GISEL-NEXT:    ; return to shader part epilog
322;
323; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ch:
324; GFX10-GISEL:       ; %bb.0: ; %entry
325; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
326; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
327; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
328; GFX10-GISEL-NEXT:    ; return to shader part epilog
329entry:
330  %xor0 = xor i32 %y, %z
331  %and = and i32 %x, %xor0
332  %xor1 = xor i32 %z, %and
333  %cast = bitcast i32 %xor1 to float
334  ret float %cast
335}
336
337define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) {
338; GFX7-LABEL: s_s_v_bfi_sha256_ch:
339; GFX7:       ; %bb.0: ; %entry
340; GFX7-NEXT:    v_mov_b32_e32 v1, s1
341; GFX7-NEXT:    v_bfi_b32 v0, s0, v1, v0
342; GFX7-NEXT:    ; return to shader part epilog
343;
344; GFX8-LABEL: s_s_v_bfi_sha256_ch:
345; GFX8:       ; %bb.0: ; %entry
346; GFX8-NEXT:    v_mov_b32_e32 v1, s1
347; GFX8-NEXT:    v_bfi_b32 v0, s0, v1, v0
348; GFX8-NEXT:    ; return to shader part epilog
349;
350; GFX10-LABEL: s_s_v_bfi_sha256_ch:
351; GFX10:       ; %bb.0: ; %entry
352; GFX10-NEXT:    v_bfi_b32 v0, s0, s1, v0
353; GFX10-NEXT:    ; return to shader part epilog
354;
355; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ch:
356; GFX8-GISEL:       ; %bb.0: ; %entry
357; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s0
358; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v1, s1, v0
359; GFX8-GISEL-NEXT:    ; return to shader part epilog
360;
361; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ch:
362; GFX10-GISEL:       ; %bb.0: ; %entry
363; GFX10-GISEL-NEXT:    v_bfi_b32 v0, s0, s1, v0
364; GFX10-GISEL-NEXT:    ; return to shader part epilog
365entry:
366  %xor0 = xor i32 %y, %z
367  %and = and i32 %x, %xor0
368  %xor1 = xor i32 %z, %and
369  %cast = bitcast i32 %xor1 to float
370  ret float %cast
371}
372
373define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) {
374; GFX7-LABEL: s_v_v_bfi_sha256_ch:
375; GFX7:       ; %bb.0: ; %entry
376; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v1
377; GFX7-NEXT:    ; return to shader part epilog
378;
379; GFX8-LABEL: s_v_v_bfi_sha256_ch:
380; GFX8:       ; %bb.0: ; %entry
381; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v1
382; GFX8-NEXT:    ; return to shader part epilog
383;
384; GFX10-LABEL: s_v_v_bfi_sha256_ch:
385; GFX10:       ; %bb.0: ; %entry
386; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, v1
387; GFX10-NEXT:    ; return to shader part epilog
388;
389; GFX8-GISEL-LABEL: s_v_v_bfi_sha256_ch:
390; GFX8-GISEL:       ; %bb.0: ; %entry
391; GFX8-GISEL-NEXT:    v_bfi_b32 v0, s0, v0, v1
392; GFX8-GISEL-NEXT:    ; return to shader part epilog
393;
394; GFX10-GISEL-LABEL: s_v_v_bfi_sha256_ch:
395; GFX10-GISEL:       ; %bb.0: ; %entry
396; GFX10-GISEL-NEXT:    v_bfi_b32 v0, s0, v0, v1
397; GFX10-GISEL-NEXT:    ; return to shader part epilog
398entry:
399  %xor0 = xor i32 %y, %z
400  %and = and i32 %x, %xor0
401  %xor1 = xor i32 %z, %and
402  %cast = bitcast i32 %xor1 to float
403  ret float %cast
404}
405
406define amdgpu_ps float @v_s_v_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 %z) {
407; GFX7-LABEL: v_s_v_bfi_sha256_ch:
408; GFX7:       ; %bb.0: ; %entry
409; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v1
410; GFX7-NEXT:    ; return to shader part epilog
411;
412; GFX8-LABEL: v_s_v_bfi_sha256_ch:
413; GFX8:       ; %bb.0: ; %entry
414; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v1
415; GFX8-NEXT:    ; return to shader part epilog
416;
417; GFX10-LABEL: v_s_v_bfi_sha256_ch:
418; GFX10:       ; %bb.0: ; %entry
419; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, v1
420; GFX10-NEXT:    ; return to shader part epilog
421;
422; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ch:
423; GFX8-GISEL:       ; %bb.0: ; %entry
424; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, s0, v1
425; GFX8-GISEL-NEXT:    ; return to shader part epilog
426;
427; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ch:
428; GFX10-GISEL:       ; %bb.0: ; %entry
429; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, s0, v1
430; GFX10-GISEL-NEXT:    ; return to shader part epilog
431entry:
432  %xor0 = xor i32 %y, %z
433  %and = and i32 %x, %xor0
434  %xor1 = xor i32 %z, %and
435  %cast = bitcast i32 %xor1 to float
436  ret float %cast
437}
438
439define amdgpu_ps float @v_v_s_bfi_sha256_ch(i32 %x, i32 %y, i32 inreg %z) {
440; GFX7-LABEL: v_v_s_bfi_sha256_ch:
441; GFX7:       ; %bb.0: ; %entry
442; GFX7-NEXT:    v_bfi_b32 v0, v0, v1, s0
443; GFX7-NEXT:    ; return to shader part epilog
444;
445; GFX8-LABEL: v_v_s_bfi_sha256_ch:
446; GFX8:       ; %bb.0: ; %entry
447; GFX8-NEXT:    v_bfi_b32 v0, v0, v1, s0
448; GFX8-NEXT:    ; return to shader part epilog
449;
450; GFX10-LABEL: v_v_s_bfi_sha256_ch:
451; GFX10:       ; %bb.0: ; %entry
452; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, s0
453; GFX10-NEXT:    ; return to shader part epilog
454;
455; GFX8-GISEL-LABEL: v_v_s_bfi_sha256_ch:
456; GFX8-GISEL:       ; %bb.0: ; %entry
457; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s0, v1
458; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
459; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
460; GFX8-GISEL-NEXT:    ; return to shader part epilog
461;
462; GFX10-GISEL-LABEL: v_v_s_bfi_sha256_ch:
463; GFX10-GISEL:       ; %bb.0: ; %entry
464; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s0, v1
465; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
466; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
467; GFX10-GISEL-NEXT:    ; return to shader part epilog
468entry:
469  %xor0 = xor i32 %y, %z
470  %and = and i32 %x, %xor0
471  %xor1 = xor i32 %z, %and
472  %cast = bitcast i32 %xor1 to float
473  ret float %cast
474}
475
476; SHA-256 Ma function
477; ((x & z) | (y & (x | z)))
478define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
479; GFX7-LABEL: s_bfi_sha256_ma:
480; GFX7:       ; %bb.0: ; %entry
481; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
482; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
483; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
484; GFX7-NEXT:    s_mov_b32 s3, 0xf000
485; GFX7-NEXT:    s_mov_b32 s2, -1
486; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
487; GFX7-NEXT:    s_and_b32 s7, s4, s6
488; GFX7-NEXT:    s_or_b32 s4, s4, s6
489; GFX7-NEXT:    s_and_b32 s4, s5, s4
490; GFX7-NEXT:    s_or_b32 s4, s7, s4
491; GFX7-NEXT:    v_mov_b32_e32 v0, s4
492; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
493; GFX7-NEXT:    s_endpgm
494;
495; GFX8-LABEL: s_bfi_sha256_ma:
496; GFX8:       ; %bb.0: ; %entry
497; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
498; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
499; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
500; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
501; GFX8-NEXT:    s_and_b32 s5, s2, s4
502; GFX8-NEXT:    s_or_b32 s2, s2, s4
503; GFX8-NEXT:    s_and_b32 s2, s3, s2
504; GFX8-NEXT:    s_or_b32 s2, s5, s2
505; GFX8-NEXT:    v_mov_b32_e32 v0, s0
506; GFX8-NEXT:    v_mov_b32_e32 v1, s1
507; GFX8-NEXT:    v_mov_b32_e32 v2, s2
508; GFX8-NEXT:    flat_store_dword v[0:1], v2
509; GFX8-NEXT:    s_endpgm
510;
511; GFX10-LABEL: s_bfi_sha256_ma:
512; GFX10:       ; %bb.0: ; %entry
513; GFX10-NEXT:    s_clause 0x2
514; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
515; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x34
516; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
517; GFX10-NEXT:    v_mov_b32_e32 v0, 0
518; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
519; GFX10-NEXT:    s_or_b32 s5, s2, s4
520; GFX10-NEXT:    s_and_b32 s2, s2, s4
521; GFX10-NEXT:    s_and_b32 s3, s3, s5
522; GFX10-NEXT:    s_or_b32 s2, s2, s3
523; GFX10-NEXT:    v_mov_b32_e32 v1, s2
524; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
525; GFX10-NEXT:    s_endpgm
526;
527; GFX8-GISEL-LABEL: s_bfi_sha256_ma:
528; GFX8-GISEL:       ; %bb.0: ; %entry
529; GFX8-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
530; GFX8-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
531; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
532; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
533; GFX8-GISEL-NEXT:    s_and_b32 s5, s2, s4
534; GFX8-GISEL-NEXT:    s_or_b32 s2, s2, s4
535; GFX8-GISEL-NEXT:    s_and_b32 s2, s3, s2
536; GFX8-GISEL-NEXT:    s_or_b32 s2, s5, s2
537; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
538; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s2
539; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
540; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
541; GFX8-GISEL-NEXT:    s_endpgm
542;
543; GFX10-GISEL-LABEL: s_bfi_sha256_ma:
544; GFX10-GISEL:       ; %bb.0: ; %entry
545; GFX10-GISEL-NEXT:    s_clause 0x2
546; GFX10-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
547; GFX10-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
548; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
549; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
550; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
551; GFX10-GISEL-NEXT:    s_or_b32 s5, s2, s4
552; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, s4
553; GFX10-GISEL-NEXT:    s_and_b32 s3, s3, s5
554; GFX10-GISEL-NEXT:    s_or_b32 s2, s2, s3
555; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
556; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
557; GFX10-GISEL-NEXT:    s_endpgm
558entry:
559  %0 = and i32 %x, %z
560  %1 = or i32 %x, %z
561  %2 = and i32 %y, %1
562  %3 = or i32 %0, %2
563  store i32 %3, i32 addrspace(1)* %out
564  ret void
565}
566
567define i32 @v_bfi_sha256_ma(i32 %x, i32 %y, i32 %z) {
568; GFX7-LABEL: v_bfi_sha256_ma:
569; GFX7:       ; %bb.0: ; %entry
570; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v1
572; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, v1
573; GFX7-NEXT:    s_setpc_b64 s[30:31]
574;
575; GFX8-LABEL: v_bfi_sha256_ma:
576; GFX8:       ; %bb.0: ; %entry
577; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v1
579; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, v1
580; GFX8-NEXT:    s_setpc_b64 s[30:31]
581;
582; GFX10-LABEL: v_bfi_sha256_ma:
583; GFX10:       ; %bb.0: ; %entry
584; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
586; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v1
587; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v1
588; GFX10-NEXT:    s_setpc_b64 s[30:31]
589;
590; GFX8-GISEL-LABEL: v_bfi_sha256_ma:
591; GFX8-GISEL:       ; %bb.0: ; %entry
592; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
594; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, v2, v1
595; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
596;
597; GFX10-GISEL-LABEL: v_bfi_sha256_ma:
598; GFX10-GISEL:       ; %bb.0: ; %entry
599; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
601; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
602; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, v2, v1
603; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
604entry:
605  %0 = and i32 %x, %z
606  %1 = or i32 %x, %z
607  %2 = and i32 %y, %1
608  %3 = or i32 %0, %2
609  ret i32 %3
610}
611
612define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
613; GFX7-LABEL: v_bitselect_v2i32_pat1:
614; GFX7:       ; %bb.0:
615; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, v4
617; GFX7-NEXT:    v_bfi_b32 v1, v3, v1, v5
618; GFX7-NEXT:    s_setpc_b64 s[30:31]
619;
620; GFX8-LABEL: v_bitselect_v2i32_pat1:
621; GFX8:       ; %bb.0:
622; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, v4
624; GFX8-NEXT:    v_bfi_b32 v1, v3, v1, v5
625; GFX8-NEXT:    s_setpc_b64 s[30:31]
626;
627; GFX10-LABEL: v_bitselect_v2i32_pat1:
628; GFX10:       ; %bb.0:
629; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
631; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
632; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
633; GFX10-NEXT:    s_setpc_b64 s[30:31]
634;
635; GFX8-GISEL-LABEL: v_bitselect_v2i32_pat1:
636; GFX8-GISEL:       ; %bb.0:
637; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
639; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
640; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
641; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
642; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
643; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
644; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
645;
646; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1:
647; GFX10-GISEL:       ; %bb.0:
648; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
650; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
651; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
652; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
653; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
654; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
655; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
656; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
657  %xor.0 = xor <2 x i32> %a, %mask
658  %and = and <2 x i32> %xor.0, %b
659  %bitselect = xor <2 x i32> %and, %mask
660  ret <2 x i32> %bitselect
661}
662
663define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
664; GFX7-LABEL: v_bitselect_i64_pat_0:
665; GFX7:       ; %bb.0:
666; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667; GFX7-NEXT:    v_bfi_b32 v1, v1, v3, v5
668; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, v4
669; GFX7-NEXT:    s_setpc_b64 s[30:31]
670;
671; GFX8-LABEL: v_bitselect_i64_pat_0:
672; GFX8:       ; %bb.0:
673; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674; GFX8-NEXT:    v_bfi_b32 v1, v1, v3, v5
675; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, v4
676; GFX8-NEXT:    s_setpc_b64 s[30:31]
677;
678; GFX10-LABEL: v_bitselect_i64_pat_0:
679; GFX10:       ; %bb.0:
680; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
681; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
682; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v4
683; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, v5
684; GFX10-NEXT:    s_setpc_b64 s[30:31]
685;
686; GFX8-GISEL-LABEL: v_bitselect_i64_pat_0:
687; GFX8-GISEL:       ; %bb.0:
688; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, v0, v2
690; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, v1, v3
691; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
692; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
693; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
694; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
695; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
696; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
697; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
698;
699; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0:
700; GFX10-GISEL:       ; %bb.0:
701; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
702; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
703; GFX10-GISEL-NEXT:    v_xor_b32_e32 v6, -1, v0
704; GFX10-GISEL-NEXT:    v_xor_b32_e32 v7, -1, v1
705; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
706; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
707; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, v6, v4
708; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, v7, v5
709; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
710; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
711; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
712  %and0 = and i64 %a, %b
713  %not.a = xor i64 %a, -1
714  %and1 = and i64 %not.a, %mask
715  %bitselect = or i64 %and0, %and1
716  ret i64 %bitselect
717}
718
719define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 inreg %mask) {
720; GFX7-LABEL: v_s_s_bitselect_i64_pat_0:
721; GFX7:       ; %bb.0:
722; GFX7-NEXT:    v_mov_b32_e32 v2, s3
723; GFX7-NEXT:    v_bfi_b32 v1, v1, s1, v2
724; GFX7-NEXT:    v_mov_b32_e32 v2, s2
725; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v2
726; GFX7-NEXT:    ; return to shader part epilog
727;
728; GFX8-LABEL: v_s_s_bitselect_i64_pat_0:
729; GFX8:       ; %bb.0:
730; GFX8-NEXT:    v_mov_b32_e32 v2, s3
731; GFX8-NEXT:    v_bfi_b32 v1, v1, s1, v2
732; GFX8-NEXT:    v_mov_b32_e32 v2, s2
733; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v2
734; GFX8-NEXT:    ; return to shader part epilog
735;
736; GFX10-LABEL: v_s_s_bitselect_i64_pat_0:
737; GFX10:       ; %bb.0:
738; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s2
739; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, s3
740; GFX10-NEXT:    ; return to shader part epilog
741;
742; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_0:
743; GFX8-GISEL:       ; %bb.0:
744; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s0, v0
745; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s1, v1
746; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
747; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
748; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s2, v0
749; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s3, v1
750; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
751; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
752; GFX8-GISEL-NEXT:    ; return to shader part epilog
753;
754; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_0:
755; GFX10-GISEL:       ; %bb.0:
756; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, -1, v0
757; GFX10-GISEL-NEXT:    v_xor_b32_e32 v3, -1, v1
758; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
759; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
760; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
761; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
762; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
763; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
764; GFX10-GISEL-NEXT:    ; return to shader part epilog
765  %and0 = and i64 %a, %b
766  %not.a = xor i64 %a, -1
767  %and1 = and i64 %not.a, %mask
768  %bitselect = or i64 %and0, %and1
769  %cast = bitcast i64 %bitselect to <2 x float>
770  ret <2 x float> %cast
771}
772
773define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 inreg %mask) {
774; GFX7-LABEL: s_v_s_bitselect_i64_pat_0:
775; GFX7:       ; %bb.0:
776; GFX7-NEXT:    v_mov_b32_e32 v2, s3
777; GFX7-NEXT:    v_bfi_b32 v1, s1, v1, v2
778; GFX7-NEXT:    v_mov_b32_e32 v2, s2
779; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v2
780; GFX7-NEXT:    ; return to shader part epilog
781;
782; GFX8-LABEL: s_v_s_bitselect_i64_pat_0:
783; GFX8:       ; %bb.0:
784; GFX8-NEXT:    v_mov_b32_e32 v2, s3
785; GFX8-NEXT:    v_bfi_b32 v1, s1, v1, v2
786; GFX8-NEXT:    v_mov_b32_e32 v2, s2
787; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v2
788; GFX8-NEXT:    ; return to shader part epilog
789;
790; GFX10-LABEL: s_v_s_bitselect_i64_pat_0:
791; GFX10:       ; %bb.0:
792; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s2
793; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, s3
794; GFX10-NEXT:    ; return to shader part epilog
795;
796; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_0:
797; GFX8-GISEL:       ; %bb.0:
798; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
799; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
800; GFX8-GISEL-NEXT:    s_andn2_b64 s[0:1], s[2:3], s[0:1]
801; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
802; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
803; GFX8-GISEL-NEXT:    ; return to shader part epilog
804;
805; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_0:
806; GFX10-GISEL:       ; %bb.0:
807; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
808; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
809; GFX10-GISEL-NEXT:    s_andn2_b64 s[0:1], s[2:3], s[0:1]
810; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
811; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
812; GFX10-GISEL-NEXT:    ; return to shader part epilog
813  %and0 = and i64 %a, %b
814  %not.a = xor i64 %a, -1
815  %and1 = and i64 %not.a, %mask
816  %bitselect = or i64 %and0, %and1
817  %cast = bitcast i64 %bitselect to <2 x float>
818  ret <2 x float> %cast
819}
820
821define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_0(i64 inreg %a, i64 inreg %b, i64 %mask) {
822; GFX7-LABEL: s_s_v_bitselect_i64_pat_0:
823; GFX7:       ; %bb.0:
824; GFX7-NEXT:    v_mov_b32_e32 v2, s3
825; GFX7-NEXT:    v_bfi_b32 v1, s1, v2, v1
826; GFX7-NEXT:    v_mov_b32_e32 v2, s2
827; GFX7-NEXT:    v_bfi_b32 v0, s0, v2, v0
828; GFX7-NEXT:    ; return to shader part epilog
829;
830; GFX8-LABEL: s_s_v_bitselect_i64_pat_0:
831; GFX8:       ; %bb.0:
832; GFX8-NEXT:    v_mov_b32_e32 v2, s3
833; GFX8-NEXT:    v_bfi_b32 v1, s1, v2, v1
834; GFX8-NEXT:    v_mov_b32_e32 v2, s2
835; GFX8-NEXT:    v_bfi_b32 v0, s0, v2, v0
836; GFX8-NEXT:    ; return to shader part epilog
837;
838; GFX10-LABEL: s_s_v_bitselect_i64_pat_0:
839; GFX10:       ; %bb.0:
840; GFX10-NEXT:    v_bfi_b32 v0, s0, s2, v0
841; GFX10-NEXT:    v_bfi_b32 v1, s1, s3, v1
842; GFX10-NEXT:    ; return to shader part epilog
843;
844; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_0:
845; GFX8-GISEL:       ; %bb.0:
846; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[0:1], s[2:3]
847; GFX8-GISEL-NEXT:    s_not_b64 s[0:1], s[0:1]
848; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
849; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
850; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s2, v0
851; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s3, v1
852; GFX8-GISEL-NEXT:    ; return to shader part epilog
853;
854; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_0:
855; GFX10-GISEL:       ; %bb.0:
856; GFX10-GISEL-NEXT:    s_not_b64 s[4:5], s[0:1]
857; GFX10-GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
858; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
859; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s5, v1
860; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
861; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
862; GFX10-GISEL-NEXT:    ; return to shader part epilog
863  %and0 = and i64 %a, %b
864  %not.a = xor i64 %a, -1
865  %and1 = and i64 %not.a, %mask
866  %bitselect = or i64 %and0, %and1
867  %cast = bitcast i64 %bitselect to <2 x float>
868  ret <2 x float> %cast
869}
870
871define amdgpu_ps <2 x float> @v_v_s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 inreg %mask) {
872; GFX7-LABEL: v_v_s_bitselect_i64_pat_0:
873; GFX7:       ; %bb.0:
874; GFX7-NEXT:    v_bfi_b32 v1, v1, v3, s1
875; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, s0
876; GFX7-NEXT:    ; return to shader part epilog
877;
878; GFX8-LABEL: v_v_s_bitselect_i64_pat_0:
879; GFX8:       ; %bb.0:
880; GFX8-NEXT:    v_bfi_b32 v1, v1, v3, s1
881; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, s0
882; GFX8-NEXT:    ; return to shader part epilog
883;
884; GFX10-LABEL: v_v_s_bitselect_i64_pat_0:
885; GFX10:       ; %bb.0:
886; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, s0
887; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, s1
888; GFX10-NEXT:    ; return to shader part epilog
889;
890; GFX8-GISEL-LABEL: v_v_s_bitselect_i64_pat_0:
891; GFX8-GISEL:       ; %bb.0:
892; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, v0, v2
893; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, v1, v3
894; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
895; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
896; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
897; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
898; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
899; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
900; GFX8-GISEL-NEXT:    ; return to shader part epilog
901;
902; GFX10-GISEL-LABEL: v_v_s_bitselect_i64_pat_0:
903; GFX10-GISEL:       ; %bb.0:
904; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, -1, v0
905; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, -1, v1
906; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
907; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
908; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s0, v4
909; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s1, v5
910; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
911; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
912; GFX10-GISEL-NEXT:    ; return to shader part epilog
913  %and0 = and i64 %a, %b
914  %not.a = xor i64 %a, -1
915  %and1 = and i64 %not.a, %mask
916  %bitselect = or i64 %and0, %and1
917  %cast = bitcast i64 %bitselect to <2 x float>
918  ret <2 x float> %cast
919}
920
921define amdgpu_ps <2 x float> @v_s_v_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 %mask) {
922; GFX7-LABEL: v_s_v_bitselect_i64_pat_0:
923; GFX7:       ; %bb.0:
924; GFX7-NEXT:    v_bfi_b32 v1, v1, s1, v3
925; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v2
926; GFX7-NEXT:    ; return to shader part epilog
927;
928; GFX8-LABEL: v_s_v_bitselect_i64_pat_0:
929; GFX8:       ; %bb.0:
930; GFX8-NEXT:    v_bfi_b32 v1, v1, s1, v3
931; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v2
932; GFX8-NEXT:    ; return to shader part epilog
933;
934; GFX10-LABEL: v_s_v_bitselect_i64_pat_0:
935; GFX10:       ; %bb.0:
936; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, v2
937; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, v3
938; GFX10-NEXT:    ; return to shader part epilog
939;
940; GFX8-GISEL-LABEL: v_s_v_bitselect_i64_pat_0:
941; GFX8-GISEL:       ; %bb.0:
942; GFX8-GISEL-NEXT:    v_and_b32_e32 v4, s0, v0
943; GFX8-GISEL-NEXT:    v_and_b32_e32 v5, s1, v1
944; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
945; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
946; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
947; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
948; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
949; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v5, v1
950; GFX8-GISEL-NEXT:    ; return to shader part epilog
951;
952; GFX10-GISEL-LABEL: v_s_v_bitselect_i64_pat_0:
953; GFX10-GISEL:       ; %bb.0:
954; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, -1, v0
955; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, -1, v1
956; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
957; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
958; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
959; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
960; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
961; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
962; GFX10-GISEL-NEXT:    ; return to shader part epilog
963  %and0 = and i64 %a, %b
964  %not.a = xor i64 %a, -1
965  %and1 = and i64 %not.a, %mask
966  %bitselect = or i64 %and0, %and1
967  %cast = bitcast i64 %bitselect to <2 x float>
968  ret <2 x float> %cast
969}
970
971define amdgpu_ps <2 x float> @s_v_v_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 %mask) {
972; GFX7-LABEL: s_v_v_bitselect_i64_pat_0:
973; GFX7:       ; %bb.0:
974; GFX7-NEXT:    v_bfi_b32 v1, s1, v1, v3
975; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v2
976; GFX7-NEXT:    ; return to shader part epilog
977;
978; GFX8-LABEL: s_v_v_bitselect_i64_pat_0:
979; GFX8:       ; %bb.0:
980; GFX8-NEXT:    v_bfi_b32 v1, s1, v1, v3
981; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v2
982; GFX8-NEXT:    ; return to shader part epilog
983;
984; GFX10-LABEL: s_v_v_bitselect_i64_pat_0:
985; GFX10:       ; %bb.0:
986; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, v2
987; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, v3
988; GFX10-NEXT:    ; return to shader part epilog
989;
990; GFX8-GISEL-LABEL: s_v_v_bitselect_i64_pat_0:
991; GFX8-GISEL:       ; %bb.0:
992; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
993; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
994; GFX8-GISEL-NEXT:    s_not_b64 s[0:1], s[0:1]
995; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s0, v2
996; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s1, v3
997; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
998; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
999; GFX8-GISEL-NEXT:    ; return to shader part epilog
1000;
1001; GFX10-GISEL-LABEL: s_v_v_bitselect_i64_pat_0:
1002; GFX10-GISEL:       ; %bb.0:
1003; GFX10-GISEL-NEXT:    s_not_b64 s[2:3], s[0:1]
1004; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1005; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1006; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
1007; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
1008; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1009; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
1010; GFX10-GISEL-NEXT:    ; return to shader part epilog
1011  %and0 = and i64 %a, %b
1012  %not.a = xor i64 %a, -1
1013  %and1 = and i64 %not.a, %mask
1014  %bitselect = or i64 %and0, %and1
1015  %cast = bitcast i64 %bitselect to <2 x float>
1016  ret <2 x float> %cast
1017}
1018
1019define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
1020; GFX7-LABEL: v_bitselect_i64_pat_1:
1021; GFX7:       ; %bb.0:
1022; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023; GFX7-NEXT:    v_bfi_b32 v1, v3, v1, v5
1024; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, v4
1025; GFX7-NEXT:    s_setpc_b64 s[30:31]
1026;
1027; GFX8-LABEL: v_bitselect_i64_pat_1:
1028; GFX8:       ; %bb.0:
1029; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030; GFX8-NEXT:    v_bfi_b32 v1, v3, v1, v5
1031; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, v4
1032; GFX8-NEXT:    s_setpc_b64 s[30:31]
1033;
1034; GFX10-LABEL: v_bitselect_i64_pat_1:
1035; GFX10:       ; %bb.0:
1036; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1038; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
1039; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
1040; GFX10-NEXT:    s_setpc_b64 s[30:31]
1041;
1042; GFX8-GISEL-LABEL: v_bitselect_i64_pat_1:
1043; GFX8-GISEL:       ; %bb.0:
1044; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1046; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1047; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
1048; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
1049; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1050; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1051; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1052;
1053; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1:
1054; GFX10-GISEL:       ; %bb.0:
1055; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1056; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1057; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1058; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1059; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
1060; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
1061; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1062; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1063; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1064  %xor.0 = xor i64 %a, %mask
1065  %and = and i64 %xor.0, %b
1066  %bitselect = xor i64 %and, %mask
1067  ret i64 %bitselect
1068}
1069
1070define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i64 inreg %mask) {
1071; GFX7-LABEL: v_s_s_bitselect_i64_pat_1:
1072; GFX7:       ; %bb.0:
1073; GFX7-NEXT:    v_mov_b32_e32 v2, s3
1074; GFX7-NEXT:    v_bfi_b32 v1, s1, v1, v2
1075; GFX7-NEXT:    v_mov_b32_e32 v2, s2
1076; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v2
1077; GFX7-NEXT:    ; return to shader part epilog
1078;
1079; GFX8-LABEL: v_s_s_bitselect_i64_pat_1:
1080; GFX8:       ; %bb.0:
1081; GFX8-NEXT:    v_mov_b32_e32 v2, s3
1082; GFX8-NEXT:    v_bfi_b32 v1, s1, v1, v2
1083; GFX8-NEXT:    v_mov_b32_e32 v2, s2
1084; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v2
1085; GFX8-NEXT:    ; return to shader part epilog
1086;
1087; GFX10-LABEL: v_s_s_bitselect_i64_pat_1:
1088; GFX10:       ; %bb.0:
1089; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s2
1090; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, s3
1091; GFX10-NEXT:    ; return to shader part epilog
1092;
1093; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_1:
1094; GFX8-GISEL:       ; %bb.0:
1095; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
1096; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
1097; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1098; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1099; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
1100; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
1101; GFX8-GISEL-NEXT:    ; return to shader part epilog
1102;
1103; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_1:
1104; GFX10-GISEL:       ; %bb.0:
1105; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
1106; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
1107; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1108; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1109; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
1110; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
1111; GFX10-GISEL-NEXT:    ; return to shader part epilog
1112  %xor.0 = xor i64 %a, %mask
1113  %and = and i64 %xor.0, %b
1114  %bitselect = xor i64 %and, %mask
1115  %cast = bitcast i64 %bitselect to <2 x float>
1116  ret <2 x float> %cast
1117}
1118
1119define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg %b, i64 %mask) {
1120; GFX7-LABEL: s_s_v_bitselect_i64_pat_1:
1121; GFX7:       ; %bb.0:
1122; GFX7-NEXT:    v_mov_b32_e32 v2, s1
1123; GFX7-NEXT:    v_bfi_b32 v1, s3, v2, v1
1124; GFX7-NEXT:    v_mov_b32_e32 v2, s0
1125; GFX7-NEXT:    v_bfi_b32 v0, s2, v2, v0
1126; GFX7-NEXT:    ; return to shader part epilog
1127;
1128; GFX8-LABEL: s_s_v_bitselect_i64_pat_1:
1129; GFX8:       ; %bb.0:
1130; GFX8-NEXT:    v_mov_b32_e32 v2, s1
1131; GFX8-NEXT:    v_bfi_b32 v1, s3, v2, v1
1132; GFX8-NEXT:    v_mov_b32_e32 v2, s0
1133; GFX8-NEXT:    v_bfi_b32 v0, s2, v2, v0
1134; GFX8-NEXT:    ; return to shader part epilog
1135;
1136; GFX10-LABEL: s_s_v_bitselect_i64_pat_1:
1137; GFX10:       ; %bb.0:
1138; GFX10-NEXT:    v_bfi_b32 v0, s2, s0, v0
1139; GFX10-NEXT:    v_bfi_b32 v1, s3, s1, v1
1140; GFX10-NEXT:    ; return to shader part epilog
1141;
1142; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1:
1143; GFX8-GISEL:       ; %bb.0:
1144; GFX8-GISEL-NEXT:    v_xor_b32_e32 v2, s0, v0
1145; GFX8-GISEL-NEXT:    v_xor_b32_e32 v3, s1, v1
1146; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
1147; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
1148; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v0
1149; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v3, v1
1150; GFX8-GISEL-NEXT:    ; return to shader part epilog
1151;
1152; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_1:
1153; GFX10-GISEL:       ; %bb.0:
1154; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, s0, v0
1155; GFX10-GISEL-NEXT:    v_xor_b32_e32 v3, s1, v1
1156; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
1157; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
1158; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v0
1159; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v3, v1
1160; GFX10-GISEL-NEXT:    ; return to shader part epilog
1161  %xor.0 = xor i64 %a, %mask
1162  %and = and i64 %xor.0, %b
1163  %bitselect = xor i64 %and, %mask
1164  %cast = bitcast i64 %bitselect to <2 x float>
1165  ret <2 x float> %cast
1166}
1167
1168define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_1(i64 inreg %a, i64 %b, i64 inreg %mask) {
1169; GFX7-LABEL: s_v_s_bitselect_i64_pat_1:
1170; GFX7:       ; %bb.0:
1171; GFX7-NEXT:    v_mov_b32_e32 v2, s3
1172; GFX7-NEXT:    v_bfi_b32 v1, v1, s1, v2
1173; GFX7-NEXT:    v_mov_b32_e32 v2, s2
1174; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v2
1175; GFX7-NEXT:    ; return to shader part epilog
1176;
1177; GFX8-LABEL: s_v_s_bitselect_i64_pat_1:
1178; GFX8:       ; %bb.0:
1179; GFX8-NEXT:    v_mov_b32_e32 v2, s3
1180; GFX8-NEXT:    v_bfi_b32 v1, v1, s1, v2
1181; GFX8-NEXT:    v_mov_b32_e32 v2, s2
1182; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v2
1183; GFX8-NEXT:    ; return to shader part epilog
1184;
1185; GFX10-LABEL: s_v_s_bitselect_i64_pat_1:
1186; GFX10:       ; %bb.0:
1187; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s2
1188; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, s3
1189; GFX10-NEXT:    ; return to shader part epilog
1190;
1191; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_1:
1192; GFX8-GISEL:       ; %bb.0:
1193; GFX8-GISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
1194; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1195; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1196; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
1197; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
1198; GFX8-GISEL-NEXT:    ; return to shader part epilog
1199;
1200; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_1:
1201; GFX10-GISEL:       ; %bb.0:
1202; GFX10-GISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
1203; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1204; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1205; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
1206; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
1207; GFX10-GISEL-NEXT:    ; return to shader part epilog
1208  %xor.0 = xor i64 %a, %mask
1209  %and = and i64 %xor.0, %b
1210  %bitselect = xor i64 %and, %mask
1211  %cast = bitcast i64 %bitselect to <2 x float>
1212  ret <2 x float> %cast
1213}
1214
1215define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
1216; GFX7-LABEL: v_bitselect_i64_pat_2:
1217; GFX7:       ; %bb.0:
1218; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219; GFX7-NEXT:    v_bfi_b32 v1, v3, v1, v5
1220; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, v4
1221; GFX7-NEXT:    s_setpc_b64 s[30:31]
1222;
1223; GFX8-LABEL: v_bitselect_i64_pat_2:
1224; GFX8:       ; %bb.0:
1225; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226; GFX8-NEXT:    v_bfi_b32 v1, v3, v1, v5
1227; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, v4
1228; GFX8-NEXT:    s_setpc_b64 s[30:31]
1229;
1230; GFX10-LABEL: v_bitselect_i64_pat_2:
1231; GFX10:       ; %bb.0:
1232; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1234; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
1235; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
1236; GFX10-NEXT:    s_setpc_b64 s[30:31]
1237;
1238; GFX8-GISEL-LABEL: v_bitselect_i64_pat_2:
1239; GFX8-GISEL:       ; %bb.0:
1240; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1242; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1243; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
1244; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
1245; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1246; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1247; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1248;
1249; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2:
1250; GFX10-GISEL:       ; %bb.0:
1251; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1253; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1254; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1255; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
1256; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
1257; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
1258; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
1259; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1260  %xor.0 = xor i64 %a, %mask
1261  %and = and i64 %xor.0, %b
1262  %bitselect = xor i64 %and, %mask
1263  ret i64 %bitselect
1264}
1265
1266define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
1267; GFX7-LABEL: v_bfi_sha256_ma_i64:
1268; GFX7:       ; %bb.0: ; %entry
1269; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1270; GFX7-NEXT:    v_xor_b32_e32 v1, v1, v3
1271; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v2
1272; GFX7-NEXT:    v_bfi_b32 v1, v1, v5, v3
1273; GFX7-NEXT:    v_bfi_b32 v0, v0, v4, v2
1274; GFX7-NEXT:    s_setpc_b64 s[30:31]
1275;
1276; GFX8-LABEL: v_bfi_sha256_ma_i64:
1277; GFX8:       ; %bb.0: ; %entry
1278; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1279; GFX8-NEXT:    v_xor_b32_e32 v1, v1, v3
1280; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v2
1281; GFX8-NEXT:    v_bfi_b32 v1, v1, v5, v3
1282; GFX8-NEXT:    v_bfi_b32 v0, v0, v4, v2
1283; GFX8-NEXT:    s_setpc_b64 s[30:31]
1284;
1285; GFX10-LABEL: v_bfi_sha256_ma_i64:
1286; GFX10:       ; %bb.0: ; %entry
1287; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1288; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1289; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
1290; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
1291; GFX10-NEXT:    v_bfi_b32 v0, v0, v4, v2
1292; GFX10-NEXT:    v_bfi_b32 v1, v1, v5, v3
1293; GFX10-NEXT:    s_setpc_b64 s[30:31]
1294;
1295; GFX8-GISEL-LABEL: v_bfi_sha256_ma_i64:
1296; GFX8-GISEL:       ; %bb.0: ; %entry
1297; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298; GFX8-GISEL-NEXT:    v_and_b32_e32 v6, v0, v4
1299; GFX8-GISEL-NEXT:    v_and_b32_e32 v7, v1, v5
1300; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v4
1301; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v1, v5
1302; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v2, v0
1303; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v3, v1
1304; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v6, v0
1305; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v7, v1
1306; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1307;
1308; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64:
1309; GFX10-GISEL:       ; %bb.0: ; %entry
1310; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1311; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1312; GFX10-GISEL-NEXT:    v_or_b32_e32 v6, v0, v4
1313; GFX10-GISEL-NEXT:    v_or_b32_e32 v7, v1, v5
1314; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
1315; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
1316; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, v2, v6
1317; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, v3, v7
1318; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1319; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
1320; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1321entry:
1322  %and0 = and i64 %x, %z
1323  %or0 = or i64 %x, %z
1324  %and1 = and i64 %y, %or0
1325  %or1 = or i64 %and0, %and1
1326  ret i64 %or1
1327}
1328
1329define amdgpu_ps <2 x float> @v_s_s_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 inreg %z) {
1330; GFX7-LABEL: v_s_s_bfi_sha256_ma_i64:
1331; GFX7:       ; %bb.0: ; %entry
1332; GFX7-NEXT:    v_xor_b32_e32 v1, s1, v1
1333; GFX7-NEXT:    v_mov_b32_e32 v2, s1
1334; GFX7-NEXT:    v_bfi_b32 v1, v1, s3, v2
1335; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
1336; GFX7-NEXT:    v_mov_b32_e32 v2, s0
1337; GFX7-NEXT:    v_bfi_b32 v0, v0, s2, v2
1338; GFX7-NEXT:    ; return to shader part epilog
1339;
1340; GFX8-LABEL: v_s_s_bfi_sha256_ma_i64:
1341; GFX8:       ; %bb.0: ; %entry
1342; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
1343; GFX8-NEXT:    v_mov_b32_e32 v2, s1
1344; GFX8-NEXT:    v_bfi_b32 v1, v1, s3, v2
1345; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
1346; GFX8-NEXT:    v_mov_b32_e32 v2, s0
1347; GFX8-NEXT:    v_bfi_b32 v0, v0, s2, v2
1348; GFX8-NEXT:    ; return to shader part epilog
1349;
1350; GFX10-LABEL: v_s_s_bfi_sha256_ma_i64:
1351; GFX10:       ; %bb.0: ; %entry
1352; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
1353; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
1354; GFX10-NEXT:    v_bfi_b32 v0, v0, s2, s0
1355; GFX10-NEXT:    v_bfi_b32 v1, v1, s3, s1
1356; GFX10-NEXT:    ; return to shader part epilog
1357;
1358; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64:
1359; GFX8-GISEL:       ; %bb.0: ; %entry
1360; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s2, v0
1361; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s3, v1
1362; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s2, v0
1363; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s3, v1
1364; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1365; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1366; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
1367; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
1368; GFX8-GISEL-NEXT:    ; return to shader part epilog
1369;
1370; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64:
1371; GFX10-GISEL:       ; %bb.0: ; %entry
1372; GFX10-GISEL-NEXT:    v_or_b32_e32 v2, s2, v0
1373; GFX10-GISEL-NEXT:    v_or_b32_e32 v3, s3, v1
1374; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s2, v0
1375; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s3, v1
1376; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s0, v2
1377; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s1, v3
1378; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1379; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
1380; GFX10-GISEL-NEXT:    ; return to shader part epilog
1381entry:
1382  %and0 = and i64 %x, %z
1383  %or0 = or i64 %x, %z
1384  %and1 = and i64 %y, %or0
1385  %or1 = or i64 %and0, %and1
1386  %cast = bitcast i64 %or1 to <2 x float>
1387  ret <2 x float> %cast
1388}
1389
1390define amdgpu_ps <2 x float> @s_v_s_bfi_sha256_ma_i64(i64 inreg %x, i64 %y, i64 inreg %z) {
1391; GFX7-LABEL: s_v_s_bfi_sha256_ma_i64:
1392; GFX7:       ; %bb.0: ; %entry
1393; GFX7-NEXT:    v_xor_b32_e32 v2, s1, v1
1394; GFX7-NEXT:    v_bfi_b32 v1, v2, s3, v1
1395; GFX7-NEXT:    v_xor_b32_e32 v2, s0, v0
1396; GFX7-NEXT:    v_bfi_b32 v0, v2, s2, v0
1397; GFX7-NEXT:    ; return to shader part epilog
1398;
1399; GFX8-LABEL: s_v_s_bfi_sha256_ma_i64:
1400; GFX8:       ; %bb.0: ; %entry
1401; GFX8-NEXT:    v_xor_b32_e32 v2, s1, v1
1402; GFX8-NEXT:    v_bfi_b32 v1, v2, s3, v1
1403; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v0
1404; GFX8-NEXT:    v_bfi_b32 v0, v2, s2, v0
1405; GFX8-NEXT:    ; return to shader part epilog
1406;
1407; GFX10-LABEL: s_v_s_bfi_sha256_ma_i64:
1408; GFX10:       ; %bb.0: ; %entry
1409; GFX10-NEXT:    v_xor_b32_e32 v2, s0, v0
1410; GFX10-NEXT:    v_xor_b32_e32 v3, s1, v1
1411; GFX10-NEXT:    v_bfi_b32 v0, v2, s2, v0
1412; GFX10-NEXT:    v_bfi_b32 v1, v3, s3, v1
1413; GFX10-NEXT:    ; return to shader part epilog
1414;
1415; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64:
1416; GFX8-GISEL:       ; %bb.0: ; %entry
1417; GFX8-GISEL-NEXT:    s_and_b64 s[4:5], s[0:1], s[2:3]
1418; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
1419; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1420; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1421; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s4, v0
1422; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s5, v1
1423; GFX8-GISEL-NEXT:    ; return to shader part epilog
1424;
1425; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64:
1426; GFX10-GISEL:       ; %bb.0: ; %entry
1427; GFX10-GISEL-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
1428; GFX10-GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
1429; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
1430; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s5, v1
1431; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
1432; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
1433; GFX10-GISEL-NEXT:    ; return to shader part epilog
1434entry:
1435  %and0 = and i64 %x, %z
1436  %or0 = or i64 %x, %z
1437  %and1 = and i64 %y, %or0
1438  %or1 = or i64 %and0, %and1
1439  %cast = bitcast i64 %or1 to <2 x float>
1440  ret <2 x float> %cast
1441}
1442
1443define amdgpu_ps <2 x float> @s_s_v_bfi_sha256_ma_i64(i64 inreg %x, i64 inreg %y, i64 %z) {
1444; GFX7-LABEL: s_s_v_bfi_sha256_ma_i64:
1445; GFX7:       ; %bb.0: ; %entry
1446; GFX7-NEXT:    v_mov_b32_e32 v2, s3
1447; GFX7-NEXT:    v_xor_b32_e32 v2, s1, v2
1448; GFX7-NEXT:    v_bfi_b32 v1, v2, v1, s3
1449; GFX7-NEXT:    v_mov_b32_e32 v2, s2
1450; GFX7-NEXT:    v_xor_b32_e32 v2, s0, v2
1451; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, s2
1452; GFX7-NEXT:    ; return to shader part epilog
1453;
1454; GFX8-LABEL: s_s_v_bfi_sha256_ma_i64:
1455; GFX8:       ; %bb.0: ; %entry
1456; GFX8-NEXT:    v_mov_b32_e32 v2, s3
1457; GFX8-NEXT:    v_xor_b32_e32 v2, s1, v2
1458; GFX8-NEXT:    v_bfi_b32 v1, v2, v1, s3
1459; GFX8-NEXT:    v_mov_b32_e32 v2, s2
1460; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v2
1461; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, s2
1462; GFX8-NEXT:    ; return to shader part epilog
1463;
1464; GFX10-LABEL: s_s_v_bfi_sha256_ma_i64:
1465; GFX10:       ; %bb.0: ; %entry
1466; GFX10-NEXT:    v_xor_b32_e64 v2, s0, s2
1467; GFX10-NEXT:    v_xor_b32_e64 v3, s1, s3
1468; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, s2
1469; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, s3
1470; GFX10-NEXT:    ; return to shader part epilog
1471;
1472; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64:
1473; GFX8-GISEL:       ; %bb.0: ; %entry
1474; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s0, v0
1475; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s1, v1
1476; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
1477; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
1478; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s2, v0
1479; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s3, v1
1480; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
1481; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
1482; GFX8-GISEL-NEXT:    ; return to shader part epilog
1483;
1484; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64:
1485; GFX10-GISEL:       ; %bb.0: ; %entry
1486; GFX10-GISEL-NEXT:    v_or_b32_e32 v2, s0, v0
1487; GFX10-GISEL-NEXT:    v_or_b32_e32 v3, s1, v1
1488; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1489; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1490; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
1491; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
1492; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1493; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
1494; GFX10-GISEL-NEXT:    ; return to shader part epilog
1495entry:
1496  %and0 = and i64 %x, %z
1497  %or0 = or i64 %x, %z
1498  %and1 = and i64 %y, %or0
1499  %or1 = or i64 %and0, %and1
1500  %cast = bitcast i64 %or1 to <2 x float>
1501  ret <2 x float> %cast
1502}
1503
1504define amdgpu_ps <2 x float> @v_s_v_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 %z) {
1505; GFX7-LABEL: v_s_v_bfi_sha256_ma_i64:
1506; GFX7:       ; %bb.0: ; %entry
1507; GFX7-NEXT:    v_xor_b32_e32 v1, s1, v1
1508; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
1509; GFX7-NEXT:    v_bfi_b32 v1, v1, v3, s1
1510; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, s0
1511; GFX7-NEXT:    ; return to shader part epilog
1512;
1513; GFX8-LABEL: v_s_v_bfi_sha256_ma_i64:
1514; GFX8:       ; %bb.0: ; %entry
1515; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
1516; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
1517; GFX8-NEXT:    v_bfi_b32 v1, v1, v3, s1
1518; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, s0
1519; GFX8-NEXT:    ; return to shader part epilog
1520;
1521; GFX10-LABEL: v_s_v_bfi_sha256_ma_i64:
1522; GFX10:       ; %bb.0: ; %entry
1523; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
1524; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
1525; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, s0
1526; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, s1
1527; GFX10-NEXT:    ; return to shader part epilog
1528;
1529; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64:
1530; GFX8-GISEL:       ; %bb.0: ; %entry
1531; GFX8-GISEL-NEXT:    v_and_b32_e32 v4, v0, v2
1532; GFX8-GISEL-NEXT:    v_and_b32_e32 v5, v1, v3
1533; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1534; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
1535; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
1536; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
1537; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
1538; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v5, v1
1539; GFX8-GISEL-NEXT:    ; return to shader part epilog
1540;
1541; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64:
1542; GFX10-GISEL:       ; %bb.0: ; %entry
1543; GFX10-GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
1544; GFX10-GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
1545; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
1546; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
1547; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s0, v4
1548; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s1, v5
1549; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1550; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
1551; GFX10-GISEL-NEXT:    ; return to shader part epilog
1552entry:
1553  %and0 = and i64 %x, %z
1554  %or0 = or i64 %x, %z
1555  %and1 = and i64 %y, %or0
1556  %or1 = or i64 %and0, %and1
1557  %cast = bitcast i64 %or1 to <2 x float>
1558  ret <2 x float> %cast
1559}
1560
1561define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
1562; GFX7-LABEL: s_bitselect_i64_pat_0:
1563; GFX7:       ; %bb.0:
1564; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1565; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
1566; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1567; GFX7-NEXT:    s_mov_b32 s2, -1
1568; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1569; GFX7-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
1570; GFX7-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
1571; GFX7-NEXT:    s_or_b64 s[0:1], s[6:7], s[0:1]
1572; GFX7-NEXT:    s_add_u32 s0, s0, 10
1573; GFX7-NEXT:    s_addc_u32 s1, s1, 0
1574; GFX7-NEXT:    v_mov_b32_e32 v0, s0
1575; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1576; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1577; GFX7-NEXT:    s_endpgm
1578;
1579; GFX8-LABEL: s_bitselect_i64_pat_0:
1580; GFX8:       ; %bb.0:
1581; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1582; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1583; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1584; GFX8-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
1585; GFX8-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
1586; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
1587; GFX8-NEXT:    s_add_u32 s0, s0, 10
1588; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1589; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1590; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1591; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1592; GFX8-NEXT:    s_endpgm
1593;
1594; GFX10-LABEL: s_bitselect_i64_pat_0:
1595; GFX10:       ; %bb.0:
1596; GFX10-NEXT:    s_clause 0x1
1597; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1598; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1599; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1600; GFX10-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
1601; GFX10-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
1602; GFX10-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
1603; GFX10-NEXT:    s_add_u32 s0, s0, 10
1604; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1605; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1606; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1607; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1608; GFX10-NEXT:    s_endpgm
1609;
1610; GFX8-GISEL-LABEL: s_bitselect_i64_pat_0:
1611; GFX8-GISEL:       ; %bb.0:
1612; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1613; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1614; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1615; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
1616; GFX8-GISEL-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
1617; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
1618; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
1619; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1620; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
1621; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1622; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1623; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1624; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1625; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1626; GFX8-GISEL-NEXT:    s_endpgm
1627;
1628; GFX10-GISEL-LABEL: s_bitselect_i64_pat_0:
1629; GFX10-GISEL:       ; %bb.0:
1630; GFX10-GISEL-NEXT:    s_clause 0x1
1631; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1632; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1633; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1634; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
1635; GFX10-GISEL-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
1636; GFX10-GISEL-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
1637; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
1638; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1639; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
1640; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1641; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1642; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1643; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1644; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1645; GFX10-GISEL-NEXT:    s_endpgm
1646  %and0 = and i64 %a, %b
1647  %not.a = xor i64 %a, -1
1648  %and1 = and i64 %not.a, %mask
1649  %bitselect = or i64 %and0, %and1
1650  %scalar.use = add i64 %bitselect, 10
1651  store i64 %scalar.use, i64 addrspace(1)* undef
1652  ret void
1653}
1654
1655define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
1656; GFX7-LABEL: s_bitselect_i64_pat_1:
1657; GFX7:       ; %bb.0:
1658; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1659; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
1660; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1661; GFX7-NEXT:    s_mov_b32 s2, -1
1662; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1663; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], s[0:1]
1664; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
1665; GFX7-NEXT:    s_xor_b64 s[0:1], s[4:5], s[0:1]
1666; GFX7-NEXT:    s_add_u32 s0, s0, 10
1667; GFX7-NEXT:    s_addc_u32 s1, s1, 0
1668; GFX7-NEXT:    v_mov_b32_e32 v0, s0
1669; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1670; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1671; GFX7-NEXT:    s_endpgm
1672;
1673; GFX8-LABEL: s_bitselect_i64_pat_1:
1674; GFX8:       ; %bb.0:
1675; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1676; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1677; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1678; GFX8-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1679; GFX8-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1680; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1681; GFX8-NEXT:    s_add_u32 s0, s0, 10
1682; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1683; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1684; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1685; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1686; GFX8-NEXT:    s_endpgm
1687;
1688; GFX10-LABEL: s_bitselect_i64_pat_1:
1689; GFX10:       ; %bb.0:
1690; GFX10-NEXT:    s_clause 0x1
1691; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1692; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1693; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1694; GFX10-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1695; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1696; GFX10-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1697; GFX10-NEXT:    s_add_u32 s0, s0, 10
1698; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1699; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1700; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1701; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1702; GFX10-NEXT:    s_endpgm
1703;
1704; GFX8-GISEL-LABEL: s_bitselect_i64_pat_1:
1705; GFX8-GISEL:       ; %bb.0:
1706; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1707; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1708; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1709; GFX8-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1710; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1711; GFX8-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1712; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
1713; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1714; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
1715; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1716; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1717; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1718; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1719; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1720; GFX8-GISEL-NEXT:    s_endpgm
1721;
1722; GFX10-GISEL-LABEL: s_bitselect_i64_pat_1:
1723; GFX10-GISEL:       ; %bb.0:
1724; GFX10-GISEL-NEXT:    s_clause 0x1
1725; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1726; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1727; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1728; GFX10-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1729; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1730; GFX10-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1731; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
1732; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1733; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
1734; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1735; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1736; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1737; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1738; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1739; GFX10-GISEL-NEXT:    s_endpgm
1740  %xor.0 = xor i64 %a, %mask
1741  %and = and i64 %xor.0, %b
1742  %bitselect = xor i64 %and, %mask
1743
1744  %scalar.use = add i64 %bitselect, 10
1745  store i64 %scalar.use, i64 addrspace(1)* undef
1746  ret void
1747}
1748
1749define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
1750; GFX7-LABEL: s_bitselect_i64_pat_2:
1751; GFX7:       ; %bb.0:
1752; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1753; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
1754; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1755; GFX7-NEXT:    s_mov_b32 s2, -1
1756; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1757; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], s[0:1]
1758; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
1759; GFX7-NEXT:    s_xor_b64 s[0:1], s[4:5], s[0:1]
1760; GFX7-NEXT:    s_add_u32 s0, s0, 10
1761; GFX7-NEXT:    s_addc_u32 s1, s1, 0
1762; GFX7-NEXT:    v_mov_b32_e32 v0, s0
1763; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1764; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1765; GFX7-NEXT:    s_endpgm
1766;
1767; GFX8-LABEL: s_bitselect_i64_pat_2:
1768; GFX8:       ; %bb.0:
1769; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1770; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1771; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1772; GFX8-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1773; GFX8-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1774; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1775; GFX8-NEXT:    s_add_u32 s0, s0, 10
1776; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1777; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1778; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1779; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1780; GFX8-NEXT:    s_endpgm
1781;
1782; GFX10-LABEL: s_bitselect_i64_pat_2:
1783; GFX10:       ; %bb.0:
1784; GFX10-NEXT:    s_clause 0x1
1785; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1786; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1787; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1788; GFX10-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1789; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1790; GFX10-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1791; GFX10-NEXT:    s_add_u32 s0, s0, 10
1792; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1793; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1794; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1795; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1796; GFX10-NEXT:    s_endpgm
1797;
1798; GFX8-GISEL-LABEL: s_bitselect_i64_pat_2:
1799; GFX8-GISEL:       ; %bb.0:
1800; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1801; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1802; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1803; GFX8-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1804; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1805; GFX8-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1806; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
1807; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1808; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
1809; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1810; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1811; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1812; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1813; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1814; GFX8-GISEL-NEXT:    s_endpgm
1815;
1816; GFX10-GISEL-LABEL: s_bitselect_i64_pat_2:
1817; GFX10-GISEL:       ; %bb.0:
1818; GFX10-GISEL-NEXT:    s_clause 0x1
1819; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1820; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1821; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1822; GFX10-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
1823; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
1824; GFX10-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
1825; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
1826; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1827; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
1828; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1829; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1830; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1831; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1832; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1833; GFX10-GISEL-NEXT:    s_endpgm
1834  %xor.0 = xor i64 %a, %mask
1835  %and = and i64 %xor.0, %b
1836  %bitselect = xor i64 %and, %mask
1837
1838  %scalar.use = add i64 %bitselect, 10
1839  store i64 %scalar.use, i64 addrspace(1)* undef
1840  ret void
1841}
1842
1843define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
1844; GFX7-LABEL: s_bfi_sha256_ma_i64:
1845; GFX7:       ; %bb.0: ; %entry
1846; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1847; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
1848; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1849; GFX7-NEXT:    s_mov_b32 s2, -1
1850; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1851; GFX7-NEXT:    s_and_b64 s[8:9], s[4:5], s[0:1]
1852; GFX7-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
1853; GFX7-NEXT:    s_and_b64 s[0:1], s[6:7], s[0:1]
1854; GFX7-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
1855; GFX7-NEXT:    s_add_u32 s0, s0, 10
1856; GFX7-NEXT:    s_addc_u32 s1, s1, 0
1857; GFX7-NEXT:    v_mov_b32_e32 v0, s0
1858; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1859; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1860; GFX7-NEXT:    s_endpgm
1861;
1862; GFX8-LABEL: s_bfi_sha256_ma_i64:
1863; GFX8:       ; %bb.0: ; %entry
1864; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1865; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1866; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1867; GFX8-NEXT:    s_and_b64 s[2:3], s[4:5], s[0:1]
1868; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
1869; GFX8-NEXT:    s_and_b64 s[0:1], s[6:7], s[0:1]
1870; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
1871; GFX8-NEXT:    s_add_u32 s0, s0, 10
1872; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1873; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1874; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1875; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1876; GFX8-NEXT:    s_endpgm
1877;
1878; GFX10-LABEL: s_bfi_sha256_ma_i64:
1879; GFX10:       ; %bb.0: ; %entry
1880; GFX10-NEXT:    s_clause 0x1
1881; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1882; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1883; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1884; GFX10-NEXT:    s_or_b64 s[2:3], s[4:5], s[0:1]
1885; GFX10-NEXT:    s_and_b64 s[0:1], s[4:5], s[0:1]
1886; GFX10-NEXT:    s_and_b64 s[2:3], s[6:7], s[2:3]
1887; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
1888; GFX10-NEXT:    s_add_u32 s0, s0, 10
1889; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1890; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1891; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1892; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1893; GFX10-NEXT:    s_endpgm
1894;
1895; GFX8-GISEL-LABEL: s_bfi_sha256_ma_i64:
1896; GFX8-GISEL:       ; %bb.0: ; %entry
1897; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1898; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1899; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1900; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[4:5], s[0:1]
1901; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
1902; GFX8-GISEL-NEXT:    s_and_b64 s[0:1], s[6:7], s[0:1]
1903; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
1904; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
1905; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1906; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
1907; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1908; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1909; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1910; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1911; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1912; GFX8-GISEL-NEXT:    s_endpgm
1913;
1914; GFX10-GISEL-LABEL: s_bfi_sha256_ma_i64:
1915; GFX10-GISEL:       ; %bb.0: ; %entry
1916; GFX10-GISEL-NEXT:    s_clause 0x1
1917; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1918; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
1919; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1920; GFX10-GISEL-NEXT:    s_or_b64 s[2:3], s[4:5], s[0:1]
1921; GFX10-GISEL-NEXT:    s_and_b64 s[0:1], s[4:5], s[0:1]
1922; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[6:7], s[2:3]
1923; GFX10-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
1924; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
1925; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
1926; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
1927; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
1928; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1929; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1930; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1931; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1932; GFX10-GISEL-NEXT:    s_endpgm
1933entry:
1934  %and0 = and i64 %x, %z
1935  %or0 = or i64 %x, %z
1936  %and1 = and i64 %y, %or0
1937  %or1 = or i64 %and0, %and1
1938
1939  %scalar.use = add i64 %or1, 10
1940  store i64 %scalar.use, i64 addrspace(1)* undef
1941  ret void
1942}
1943