xref: /llvm-project/llvm/test/CodeGen/AMDGPU/xor.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde < %s | FileCheck -enable-var-scope -check-prefixes=SI,GCN %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -enable-var-scope -check-prefixes=VI,GCN %s
4
5define amdgpu_kernel void @xor_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
6; SI-LABEL: xor_v2i32:
7; SI:       ; %bb.0:
8; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
10; SI-NEXT:    s_mov_b32 s7, 0xf000
11; SI-NEXT:    s_mov_b32 s6, -1
12; SI-NEXT:    s_mov_b32 s10, s6
13; SI-NEXT:    s_mov_b32 s11, s7
14; SI-NEXT:    s_waitcnt lgkmcnt(0)
15; SI-NEXT:    s_mov_b32 s12, s2
16; SI-NEXT:    s_mov_b32 s13, s3
17; SI-NEXT:    s_mov_b32 s14, s6
18; SI-NEXT:    s_mov_b32 s15, s7
19; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
20; SI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[12:15], 0
21; SI-NEXT:    s_mov_b32 s4, s0
22; SI-NEXT:    s_mov_b32 s5, s1
23; SI-NEXT:    s_waitcnt vmcnt(0)
24; SI-NEXT:    v_xor_b32_e32 v1, v3, v1
25; SI-NEXT:    v_xor_b32_e32 v0, v2, v0
26; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
27; SI-NEXT:    s_endpgm
28;
29; VI-LABEL: xor_v2i32:
30; VI:       ; %bb.0:
31; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
32; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
33; VI-NEXT:    s_waitcnt lgkmcnt(0)
34; VI-NEXT:    v_mov_b32_e32 v0, s2
35; VI-NEXT:    v_mov_b32_e32 v1, s3
36; VI-NEXT:    v_mov_b32_e32 v2, s4
37; VI-NEXT:    v_mov_b32_e32 v3, s5
38; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
39; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
40; VI-NEXT:    v_mov_b32_e32 v4, s0
41; VI-NEXT:    v_mov_b32_e32 v5, s1
42; VI-NEXT:    s_waitcnt vmcnt(0)
43; VI-NEXT:    v_xor_b32_e32 v1, v1, v3
44; VI-NEXT:    v_xor_b32_e32 v0, v0, v2
45; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
46; VI-NEXT:    s_endpgm
47  %a = load <2 x i32>, ptr addrspace(1) %in0
48  %b = load <2 x i32>, ptr addrspace(1) %in1
49  %result = xor <2 x i32> %a, %b
50  store <2 x i32> %result, ptr addrspace(1) %out
51  ret void
52}
53
54define amdgpu_kernel void @xor_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
55; SI-LABEL: xor_v4i32:
56; SI:       ; %bb.0:
57; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
58; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
59; SI-NEXT:    s_mov_b32 s7, 0xf000
60; SI-NEXT:    s_mov_b32 s6, -1
61; SI-NEXT:    s_mov_b32 s10, s6
62; SI-NEXT:    s_mov_b32 s11, s7
63; SI-NEXT:    s_waitcnt lgkmcnt(0)
64; SI-NEXT:    s_mov_b32 s12, s2
65; SI-NEXT:    s_mov_b32 s13, s3
66; SI-NEXT:    s_mov_b32 s14, s6
67; SI-NEXT:    s_mov_b32 s15, s7
68; SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
69; SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[12:15], 0
70; SI-NEXT:    s_mov_b32 s4, s0
71; SI-NEXT:    s_mov_b32 s5, s1
72; SI-NEXT:    s_waitcnt vmcnt(0)
73; SI-NEXT:    v_xor_b32_e32 v3, v7, v3
74; SI-NEXT:    v_xor_b32_e32 v2, v6, v2
75; SI-NEXT:    v_xor_b32_e32 v1, v5, v1
76; SI-NEXT:    v_xor_b32_e32 v0, v4, v0
77; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
78; SI-NEXT:    s_endpgm
79;
80; VI-LABEL: xor_v4i32:
81; VI:       ; %bb.0:
82; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
83; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
84; VI-NEXT:    s_waitcnt lgkmcnt(0)
85; VI-NEXT:    v_mov_b32_e32 v0, s2
86; VI-NEXT:    v_mov_b32_e32 v1, s3
87; VI-NEXT:    v_mov_b32_e32 v4, s4
88; VI-NEXT:    v_mov_b32_e32 v5, s5
89; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
90; VI-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
91; VI-NEXT:    v_mov_b32_e32 v8, s0
92; VI-NEXT:    v_mov_b32_e32 v9, s1
93; VI-NEXT:    s_waitcnt vmcnt(0)
94; VI-NEXT:    v_xor_b32_e32 v3, v3, v7
95; VI-NEXT:    v_xor_b32_e32 v2, v2, v6
96; VI-NEXT:    v_xor_b32_e32 v1, v1, v5
97; VI-NEXT:    v_xor_b32_e32 v0, v0, v4
98; VI-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
99; VI-NEXT:    s_endpgm
100  %a = load <4 x i32>, ptr addrspace(1) %in0
101  %b = load <4 x i32>, ptr addrspace(1) %in1
102  %result = xor <4 x i32> %a, %b
103  store <4 x i32> %result, ptr addrspace(1) %out
104  ret void
105}
106
107define amdgpu_kernel void @xor_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
108; SI-LABEL: xor_i1:
109; SI:       ; %bb.0:
110; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
111; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
112; SI-NEXT:    s_mov_b32 s7, 0xf000
113; SI-NEXT:    s_mov_b32 s6, -1
114; SI-NEXT:    s_mov_b32 s10, s6
115; SI-NEXT:    s_mov_b32 s11, s7
116; SI-NEXT:    s_waitcnt lgkmcnt(0)
117; SI-NEXT:    s_mov_b32 s12, s2
118; SI-NEXT:    s_mov_b32 s13, s3
119; SI-NEXT:    s_mov_b32 s14, s6
120; SI-NEXT:    s_mov_b32 s15, s7
121; SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
122; SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0
123; SI-NEXT:    s_mov_b32 s4, s0
124; SI-NEXT:    s_mov_b32 s5, s1
125; SI-NEXT:    s_waitcnt vmcnt(1)
126; SI-NEXT:    v_cmp_le_f32_e32 vcc, 1.0, v0
127; SI-NEXT:    s_waitcnt vmcnt(0)
128; SI-NEXT:    v_cmp_le_f32_e64 s[0:1], 0, v1
129; SI-NEXT:    s_xor_b64 vcc, s[0:1], vcc
130; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
131; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
132; SI-NEXT:    s_endpgm
133;
134; VI-LABEL: xor_i1:
135; VI:       ; %bb.0:
136; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
137; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
138; VI-NEXT:    s_waitcnt lgkmcnt(0)
139; VI-NEXT:    v_mov_b32_e32 v0, s2
140; VI-NEXT:    v_mov_b32_e32 v1, s3
141; VI-NEXT:    v_mov_b32_e32 v2, s4
142; VI-NEXT:    v_mov_b32_e32 v3, s5
143; VI-NEXT:    flat_load_dword v4, v[0:1]
144; VI-NEXT:    flat_load_dword v2, v[2:3]
145; VI-NEXT:    v_mov_b32_e32 v0, s0
146; VI-NEXT:    v_mov_b32_e32 v1, s1
147; VI-NEXT:    s_waitcnt vmcnt(1)
148; VI-NEXT:    v_cmp_le_f32_e32 vcc, 0, v4
149; VI-NEXT:    s_waitcnt vmcnt(0)
150; VI-NEXT:    v_cmp_le_f32_e64 s[0:1], 1.0, v2
151; VI-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
152; VI-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
153; VI-NEXT:    flat_store_dword v[0:1], v2
154; VI-NEXT:    s_endpgm
155  %a = load float, ptr addrspace(1) %in0
156  %b = load float, ptr addrspace(1) %in1
157  %acmp = fcmp oge float %a, 0.000000e+00
158  %bcmp = fcmp oge float %b, 1.000000e+00
159  %xor = xor i1 %acmp, %bcmp
160  %result = select i1 %xor, float %a, float %b
161  store float %result, ptr addrspace(1) %out
162  ret void
163}
164
165define amdgpu_kernel void @v_xor_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
166; SI-LABEL: v_xor_i1:
167; SI:       ; %bb.0:
168; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
169; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
170; SI-NEXT:    s_mov_b32 s7, 0xf000
171; SI-NEXT:    s_mov_b32 s6, -1
172; SI-NEXT:    s_mov_b32 s14, s6
173; SI-NEXT:    s_waitcnt lgkmcnt(0)
174; SI-NEXT:    s_mov_b32 s12, s2
175; SI-NEXT:    s_mov_b32 s13, s3
176; SI-NEXT:    s_mov_b32 s15, s7
177; SI-NEXT:    s_mov_b32 s10, s6
178; SI-NEXT:    s_mov_b32 s11, s7
179; SI-NEXT:    buffer_load_ubyte v0, off, s[12:15], 0 glc
180; SI-NEXT:    s_waitcnt vmcnt(0)
181; SI-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 glc
182; SI-NEXT:    s_waitcnt vmcnt(0)
183; SI-NEXT:    s_mov_b32 s4, s0
184; SI-NEXT:    s_mov_b32 s5, s1
185; SI-NEXT:    v_xor_b32_e32 v0, v0, v1
186; SI-NEXT:    v_and_b32_e32 v0, 1, v0
187; SI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
188; SI-NEXT:    s_endpgm
189;
190; VI-LABEL: v_xor_i1:
191; VI:       ; %bb.0:
192; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
193; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
194; VI-NEXT:    s_waitcnt lgkmcnt(0)
195; VI-NEXT:    v_mov_b32_e32 v0, s2
196; VI-NEXT:    v_mov_b32_e32 v1, s3
197; VI-NEXT:    v_mov_b32_e32 v2, s4
198; VI-NEXT:    v_mov_b32_e32 v3, s5
199; VI-NEXT:    flat_load_ubyte v4, v[0:1] glc
200; VI-NEXT:    s_waitcnt vmcnt(0)
201; VI-NEXT:    flat_load_ubyte v2, v[2:3] glc
202; VI-NEXT:    s_waitcnt vmcnt(0)
203; VI-NEXT:    v_mov_b32_e32 v0, s0
204; VI-NEXT:    v_mov_b32_e32 v1, s1
205; VI-NEXT:    v_xor_b32_e32 v2, v4, v2
206; VI-NEXT:    v_and_b32_e32 v2, 1, v2
207; VI-NEXT:    flat_store_byte v[0:1], v2
208; VI-NEXT:    s_endpgm
209  %a = load volatile i1, ptr addrspace(1) %in0
210  %b = load volatile i1, ptr addrspace(1) %in1
211  %xor = xor i1 %a, %b
212  store i1 %xor, ptr addrspace(1) %out
213  ret void
214}
215
216define amdgpu_kernel void @vector_xor_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
217; SI-LABEL: vector_xor_i32:
218; SI:       ; %bb.0:
219; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
220; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
221; SI-NEXT:    s_mov_b32 s7, 0xf000
222; SI-NEXT:    s_mov_b32 s6, -1
223; SI-NEXT:    s_mov_b32 s14, s6
224; SI-NEXT:    s_waitcnt lgkmcnt(0)
225; SI-NEXT:    s_mov_b32 s12, s2
226; SI-NEXT:    s_mov_b32 s13, s3
227; SI-NEXT:    s_mov_b32 s15, s7
228; SI-NEXT:    s_mov_b32 s10, s6
229; SI-NEXT:    s_mov_b32 s11, s7
230; SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
231; SI-NEXT:    buffer_load_dword v1, off, s[8:11], 0
232; SI-NEXT:    s_mov_b32 s4, s0
233; SI-NEXT:    s_mov_b32 s5, s1
234; SI-NEXT:    s_waitcnt vmcnt(0)
235; SI-NEXT:    v_xor_b32_e32 v0, v0, v1
236; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
237; SI-NEXT:    s_endpgm
238;
239; VI-LABEL: vector_xor_i32:
240; VI:       ; %bb.0:
241; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
242; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
243; VI-NEXT:    s_waitcnt lgkmcnt(0)
244; VI-NEXT:    v_mov_b32_e32 v0, s2
245; VI-NEXT:    v_mov_b32_e32 v1, s3
246; VI-NEXT:    v_mov_b32_e32 v2, s4
247; VI-NEXT:    v_mov_b32_e32 v3, s5
248; VI-NEXT:    flat_load_dword v4, v[0:1]
249; VI-NEXT:    flat_load_dword v2, v[2:3]
250; VI-NEXT:    v_mov_b32_e32 v0, s0
251; VI-NEXT:    v_mov_b32_e32 v1, s1
252; VI-NEXT:    s_waitcnt vmcnt(0)
253; VI-NEXT:    v_xor_b32_e32 v2, v4, v2
254; VI-NEXT:    flat_store_dword v[0:1], v2
255; VI-NEXT:    s_endpgm
256  %a = load i32, ptr addrspace(1) %in0
257  %b = load i32, ptr addrspace(1) %in1
258  %result = xor i32 %a, %b
259  store i32 %result, ptr addrspace(1) %out
260  ret void
261}
262
263define amdgpu_kernel void @scalar_xor_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
264; SI-LABEL: scalar_xor_i32:
265; SI:       ; %bb.0:
266; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
267; SI-NEXT:    s_mov_b32 s7, 0xf000
268; SI-NEXT:    s_mov_b32 s6, -1
269; SI-NEXT:    s_waitcnt lgkmcnt(0)
270; SI-NEXT:    s_mov_b32 s4, s0
271; SI-NEXT:    s_xor_b32 s0, s2, s3
272; SI-NEXT:    s_mov_b32 s5, s1
273; SI-NEXT:    v_mov_b32_e32 v0, s0
274; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
275; SI-NEXT:    s_endpgm
276;
277; VI-LABEL: scalar_xor_i32:
278; VI:       ; %bb.0:
279; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
280; VI-NEXT:    s_waitcnt lgkmcnt(0)
281; VI-NEXT:    s_xor_b32 s2, s2, s3
282; VI-NEXT:    v_mov_b32_e32 v0, s0
283; VI-NEXT:    v_mov_b32_e32 v1, s1
284; VI-NEXT:    v_mov_b32_e32 v2, s2
285; VI-NEXT:    flat_store_dword v[0:1], v2
286; VI-NEXT:    s_endpgm
287  %result = xor i32 %a, %b
288  store i32 %result, ptr addrspace(1) %out
289  ret void
290}
291
292define amdgpu_kernel void @scalar_not_i32(ptr addrspace(1) %out, i32 %a) {
293; SI-LABEL: scalar_not_i32:
294; SI:       ; %bb.0:
295; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
296; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
297; SI-NEXT:    s_mov_b32 s3, 0xf000
298; SI-NEXT:    s_mov_b32 s2, -1
299; SI-NEXT:    s_waitcnt lgkmcnt(0)
300; SI-NEXT:    s_not_b32 s4, s6
301; SI-NEXT:    v_mov_b32_e32 v0, s4
302; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
303; SI-NEXT:    s_endpgm
304;
305; VI-LABEL: scalar_not_i32:
306; VI:       ; %bb.0:
307; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
308; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
309; VI-NEXT:    s_waitcnt lgkmcnt(0)
310; VI-NEXT:    s_not_b32 s2, s2
311; VI-NEXT:    v_mov_b32_e32 v0, s0
312; VI-NEXT:    v_mov_b32_e32 v1, s1
313; VI-NEXT:    v_mov_b32_e32 v2, s2
314; VI-NEXT:    flat_store_dword v[0:1], v2
315; VI-NEXT:    s_endpgm
316  %result = xor i32 %a, -1
317  store i32 %result, ptr addrspace(1) %out
318  ret void
319}
320
321define amdgpu_kernel void @vector_not_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
322; SI-LABEL: vector_not_i32:
323; SI:       ; %bb.0:
324; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
325; SI-NEXT:    s_mov_b32 s7, 0xf000
326; SI-NEXT:    s_mov_b32 s6, -1
327; SI-NEXT:    s_mov_b32 s10, s6
328; SI-NEXT:    s_mov_b32 s11, s7
329; SI-NEXT:    s_waitcnt lgkmcnt(0)
330; SI-NEXT:    s_mov_b32 s8, s2
331; SI-NEXT:    s_mov_b32 s9, s3
332; SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
333; SI-NEXT:    s_mov_b32 s4, s0
334; SI-NEXT:    s_mov_b32 s5, s1
335; SI-NEXT:    s_waitcnt vmcnt(0)
336; SI-NEXT:    v_not_b32_e32 v0, v0
337; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
338; SI-NEXT:    s_endpgm
339;
340; VI-LABEL: vector_not_i32:
341; VI:       ; %bb.0:
342; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
343; VI-NEXT:    s_waitcnt lgkmcnt(0)
344; VI-NEXT:    v_mov_b32_e32 v0, s2
345; VI-NEXT:    v_mov_b32_e32 v1, s3
346; VI-NEXT:    flat_load_dword v2, v[0:1]
347; VI-NEXT:    v_mov_b32_e32 v0, s0
348; VI-NEXT:    v_mov_b32_e32 v1, s1
349; VI-NEXT:    s_waitcnt vmcnt(0)
350; VI-NEXT:    v_not_b32_e32 v2, v2
351; VI-NEXT:    flat_store_dword v[0:1], v2
352; VI-NEXT:    s_endpgm
353  %a = load i32, ptr addrspace(1) %in0
354  %b = load i32, ptr addrspace(1) %in1
355  %result = xor i32 %a, -1
356  store i32 %result, ptr addrspace(1) %out
357  ret void
358}
359
360define amdgpu_kernel void @vector_xor_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
361; SI-LABEL: vector_xor_i64:
362; SI:       ; %bb.0:
363; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
364; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
365; SI-NEXT:    s_mov_b32 s7, 0xf000
366; SI-NEXT:    s_mov_b32 s6, -1
367; SI-NEXT:    s_mov_b32 s10, s6
368; SI-NEXT:    s_mov_b32 s11, s7
369; SI-NEXT:    s_waitcnt lgkmcnt(0)
370; SI-NEXT:    s_mov_b32 s12, s2
371; SI-NEXT:    s_mov_b32 s13, s3
372; SI-NEXT:    s_mov_b32 s14, s6
373; SI-NEXT:    s_mov_b32 s15, s7
374; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
375; SI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[12:15], 0
376; SI-NEXT:    s_mov_b32 s4, s0
377; SI-NEXT:    s_mov_b32 s5, s1
378; SI-NEXT:    s_waitcnt vmcnt(0)
379; SI-NEXT:    v_xor_b32_e32 v0, v2, v0
380; SI-NEXT:    v_xor_b32_e32 v1, v3, v1
381; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
382; SI-NEXT:    s_endpgm
383;
384; VI-LABEL: vector_xor_i64:
385; VI:       ; %bb.0:
386; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
387; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
388; VI-NEXT:    s_waitcnt lgkmcnt(0)
389; VI-NEXT:    v_mov_b32_e32 v0, s2
390; VI-NEXT:    v_mov_b32_e32 v1, s3
391; VI-NEXT:    v_mov_b32_e32 v2, s4
392; VI-NEXT:    v_mov_b32_e32 v3, s5
393; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
394; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
395; VI-NEXT:    v_mov_b32_e32 v4, s0
396; VI-NEXT:    v_mov_b32_e32 v5, s1
397; VI-NEXT:    s_waitcnt vmcnt(0)
398; VI-NEXT:    v_xor_b32_e32 v0, v0, v2
399; VI-NEXT:    v_xor_b32_e32 v1, v1, v3
400; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
401; VI-NEXT:    s_endpgm
402  %a = load i64, ptr addrspace(1) %in0
403  %b = load i64, ptr addrspace(1) %in1
404  %result = xor i64 %a, %b
405  store i64 %result, ptr addrspace(1) %out
406  ret void
407}
408
409define amdgpu_kernel void @scalar_xor_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
410; SI-LABEL: scalar_xor_i64:
411; SI:       ; %bb.0:
412; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
413; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
414; SI-NEXT:    s_mov_b32 s7, 0xf000
415; SI-NEXT:    s_mov_b32 s6, -1
416; SI-NEXT:    s_waitcnt lgkmcnt(0)
417; SI-NEXT:    s_mov_b32 s4, s0
418; SI-NEXT:    s_mov_b32 s5, s1
419; SI-NEXT:    s_xor_b64 s[0:1], s[2:3], s[8:9]
420; SI-NEXT:    v_mov_b32_e32 v0, s0
421; SI-NEXT:    v_mov_b32_e32 v1, s1
422; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
423; SI-NEXT:    s_endpgm
424;
425; VI-LABEL: scalar_xor_i64:
426; VI:       ; %bb.0:
427; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
428; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
429; VI-NEXT:    s_waitcnt lgkmcnt(0)
430; VI-NEXT:    v_mov_b32_e32 v0, s0
431; VI-NEXT:    v_mov_b32_e32 v1, s1
432; VI-NEXT:    s_xor_b64 s[0:1], s[2:3], s[4:5]
433; VI-NEXT:    v_mov_b32_e32 v3, s1
434; VI-NEXT:    v_mov_b32_e32 v2, s0
435; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
436; VI-NEXT:    s_endpgm
437  %result = xor i64 %a, %b
438  store i64 %result, ptr addrspace(1) %out
439  ret void
440}
441
442define amdgpu_kernel void @scalar_not_i64(ptr addrspace(1) %out, i64 %a) {
443; SI-LABEL: scalar_not_i64:
444; SI:       ; %bb.0:
445; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
446; SI-NEXT:    s_mov_b32 s7, 0xf000
447; SI-NEXT:    s_mov_b32 s6, -1
448; SI-NEXT:    s_waitcnt lgkmcnt(0)
449; SI-NEXT:    s_mov_b32 s4, s0
450; SI-NEXT:    s_mov_b32 s5, s1
451; SI-NEXT:    s_not_b64 s[0:1], s[2:3]
452; SI-NEXT:    v_mov_b32_e32 v0, s0
453; SI-NEXT:    v_mov_b32_e32 v1, s1
454; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
455; SI-NEXT:    s_endpgm
456;
457; VI-LABEL: scalar_not_i64:
458; VI:       ; %bb.0:
459; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
460; VI-NEXT:    s_waitcnt lgkmcnt(0)
461; VI-NEXT:    v_mov_b32_e32 v0, s0
462; VI-NEXT:    v_mov_b32_e32 v1, s1
463; VI-NEXT:    s_not_b64 s[0:1], s[2:3]
464; VI-NEXT:    v_mov_b32_e32 v3, s1
465; VI-NEXT:    v_mov_b32_e32 v2, s0
466; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
467; VI-NEXT:    s_endpgm
468  %result = xor i64 %a, -1
469  store i64 %result, ptr addrspace(1) %out
470  ret void
471}
472
473define amdgpu_kernel void @vector_not_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
474; SI-LABEL: vector_not_i64:
475; SI:       ; %bb.0:
476; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
477; SI-NEXT:    s_mov_b32 s7, 0xf000
478; SI-NEXT:    s_mov_b32 s6, -1
479; SI-NEXT:    s_mov_b32 s10, s6
480; SI-NEXT:    s_mov_b32 s11, s7
481; SI-NEXT:    s_waitcnt lgkmcnt(0)
482; SI-NEXT:    s_mov_b32 s8, s2
483; SI-NEXT:    s_mov_b32 s9, s3
484; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
485; SI-NEXT:    s_mov_b32 s4, s0
486; SI-NEXT:    s_mov_b32 s5, s1
487; SI-NEXT:    s_waitcnt vmcnt(0)
488; SI-NEXT:    v_not_b32_e32 v0, v0
489; SI-NEXT:    v_not_b32_e32 v1, v1
490; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
491; SI-NEXT:    s_endpgm
492;
493; VI-LABEL: vector_not_i64:
494; VI:       ; %bb.0:
495; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
496; VI-NEXT:    s_waitcnt lgkmcnt(0)
497; VI-NEXT:    v_mov_b32_e32 v0, s2
498; VI-NEXT:    v_mov_b32_e32 v1, s3
499; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
500; VI-NEXT:    v_mov_b32_e32 v2, s0
501; VI-NEXT:    v_mov_b32_e32 v3, s1
502; VI-NEXT:    s_waitcnt vmcnt(0)
503; VI-NEXT:    v_not_b32_e32 v0, v0
504; VI-NEXT:    v_not_b32_e32 v1, v1
505; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
506; VI-NEXT:    s_endpgm
507  %a = load i64, ptr addrspace(1) %in0
508  %b = load i64, ptr addrspace(1) %in1
509  %result = xor i64 %a, -1
510  store i64 %result, ptr addrspace(1) %out
511  ret void
512}
513
514define amdgpu_kernel void @xor_cf(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %a, i64 %b) {
515; SI-LABEL: xor_cf:
516; SI:       ; %bb.0: ; %entry
517; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
518; SI-NEXT:    s_mov_b64 s[8:9], 0
519; SI-NEXT:    s_waitcnt lgkmcnt(0)
520; SI-NEXT:    v_cmp_ne_u64_e64 s[10:11], s[4:5], 0
521; SI-NEXT:    s_and_b64 vcc, exec, s[10:11]
522; SI-NEXT:    s_cbranch_vccz .LBB12_4
523; SI-NEXT:  ; %bb.1: ; %else
524; SI-NEXT:    s_mov_b32 s15, 0xf000
525; SI-NEXT:    s_mov_b32 s14, -1
526; SI-NEXT:    s_mov_b32 s12, s2
527; SI-NEXT:    s_mov_b32 s13, s3
528; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[12:15], 0
529; SI-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
530; SI-NEXT:    s_cbranch_vccnz .LBB12_3
531; SI-NEXT:  .LBB12_2: ; %if
532; SI-NEXT:    s_xor_b64 s[2:3], s[4:5], s[6:7]
533; SI-NEXT:    s_waitcnt vmcnt(0)
534; SI-NEXT:    v_mov_b32_e32 v0, s2
535; SI-NEXT:    v_mov_b32_e32 v1, s3
536; SI-NEXT:  .LBB12_3: ; %endif
537; SI-NEXT:    s_mov_b32 s3, 0xf000
538; SI-NEXT:    s_mov_b32 s2, -1
539; SI-NEXT:    s_waitcnt vmcnt(0)
540; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
541; SI-NEXT:    s_endpgm
542; SI-NEXT:  .LBB12_4:
543; SI-NEXT:    ; implicit-def: $vgpr0_vgpr1
544; SI-NEXT:    s_branch .LBB12_2
545;
546; VI-LABEL: xor_cf:
547; VI:       ; %bb.0: ; %entry
548; VI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
549; VI-NEXT:    s_mov_b64 s[8:9], 0
550; VI-NEXT:    s_waitcnt lgkmcnt(0)
551; VI-NEXT:    s_cmp_lg_u64 s[4:5], 0
552; VI-NEXT:    s_cbranch_scc0 .LBB12_4
553; VI-NEXT:  ; %bb.1: ; %else
554; VI-NEXT:    v_mov_b32_e32 v0, s2
555; VI-NEXT:    v_mov_b32_e32 v1, s3
556; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
557; VI-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
558; VI-NEXT:    s_cbranch_vccnz .LBB12_3
559; VI-NEXT:  .LBB12_2: ; %if
560; VI-NEXT:    s_xor_b64 s[2:3], s[4:5], s[6:7]
561; VI-NEXT:    s_waitcnt vmcnt(0)
562; VI-NEXT:    v_mov_b32_e32 v0, s2
563; VI-NEXT:    v_mov_b32_e32 v1, s3
564; VI-NEXT:  .LBB12_3: ; %endif
565; VI-NEXT:    v_mov_b32_e32 v2, s0
566; VI-NEXT:    v_mov_b32_e32 v3, s1
567; VI-NEXT:    s_waitcnt vmcnt(0)
568; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
569; VI-NEXT:    s_endpgm
570; VI-NEXT:  .LBB12_4:
571; VI-NEXT:    ; implicit-def: $vgpr0_vgpr1
572; VI-NEXT:    s_branch .LBB12_2
573entry:
574  %0 = icmp eq i64 %a, 0
575  br i1 %0, label %if, label %else
576
577if:
578  %1 = xor i64 %a, %b
579  br label %endif
580
581else:
582  %2 = load i64, ptr addrspace(1) %in
583  br label %endif
584
585endif:
586  %3 = phi i64 [%1, %if], [%2, %else]
587  store i64 %3, ptr addrspace(1) %out
588  ret void
589}
590
591define amdgpu_kernel void @scalar_xor_literal_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
592; SI-LABEL: scalar_xor_literal_i64:
593; SI:       ; %bb.0:
594; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
595; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
596; SI-NEXT:    s_mov_b32 s3, 0xf000
597; SI-NEXT:    s_mov_b32 s2, -1
598; SI-NEXT:    s_waitcnt lgkmcnt(0)
599; SI-NEXT:    s_xor_b32 s4, s7, 0xf237b
600; SI-NEXT:    s_xor_b32 s5, s6, 0x3039
601; SI-NEXT:    v_mov_b32_e32 v0, s5
602; SI-NEXT:    v_mov_b32_e32 v1, s4
603; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
604; SI-NEXT:    s_endpgm
605;
606; VI-LABEL: scalar_xor_literal_i64:
607; VI:       ; %bb.0:
608; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x4c
609; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
610; VI-NEXT:    s_waitcnt lgkmcnt(0)
611; VI-NEXT:    s_xor_b32 s1, s1, 0xf237b
612; VI-NEXT:    s_xor_b32 s0, s0, 0x3039
613; VI-NEXT:    v_mov_b32_e32 v2, s2
614; VI-NEXT:    v_mov_b32_e32 v0, s0
615; VI-NEXT:    v_mov_b32_e32 v1, s1
616; VI-NEXT:    v_mov_b32_e32 v3, s3
617; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
618; VI-NEXT:    s_endpgm
619  %or = xor i64 %a, 4261135838621753
620  store i64 %or, ptr addrspace(1) %out
621  ret void
622}
623
624define amdgpu_kernel void @scalar_xor_literal_multi_use_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, i64 %b) {
625; SI-LABEL: scalar_xor_literal_multi_use_i64:
626; SI:       ; %bb.0:
627; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
628; SI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x13
629; SI-NEXT:    s_movk_i32 s8, 0x3039
630; SI-NEXT:    s_mov_b32 s9, 0xf237b
631; SI-NEXT:    s_mov_b32 s3, 0xf000
632; SI-NEXT:    s_mov_b32 s2, -1
633; SI-NEXT:    s_waitcnt lgkmcnt(0)
634; SI-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
635; SI-NEXT:    v_mov_b32_e32 v0, s4
636; SI-NEXT:    v_mov_b32_e32 v1, s5
637; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
638; SI-NEXT:    s_add_u32 s0, s6, 0x3039
639; SI-NEXT:    s_addc_u32 s1, s7, 0xf237b
640; SI-NEXT:    s_waitcnt expcnt(0)
641; SI-NEXT:    v_mov_b32_e32 v0, s0
642; SI-NEXT:    v_mov_b32_e32 v1, s1
643; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
644; SI-NEXT:    s_waitcnt vmcnt(0)
645; SI-NEXT:    s_endpgm
646;
647; VI-LABEL: scalar_xor_literal_multi_use_i64:
648; VI:       ; %bb.0:
649; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4c
650; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
651; VI-NEXT:    s_movk_i32 s6, 0x3039
652; VI-NEXT:    s_mov_b32 s7, 0xf237b
653; VI-NEXT:    s_waitcnt lgkmcnt(0)
654; VI-NEXT:    s_xor_b64 s[0:1], s[0:1], s[6:7]
655; VI-NEXT:    v_mov_b32_e32 v0, s4
656; VI-NEXT:    v_mov_b32_e32 v3, s1
657; VI-NEXT:    v_mov_b32_e32 v1, s5
658; VI-NEXT:    v_mov_b32_e32 v2, s0
659; VI-NEXT:    s_add_u32 s0, s2, 0x3039
660; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
661; VI-NEXT:    s_addc_u32 s1, s3, 0xf237b
662; VI-NEXT:    v_mov_b32_e32 v0, s0
663; VI-NEXT:    v_mov_b32_e32 v1, s1
664; VI-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
665; VI-NEXT:    s_waitcnt vmcnt(0)
666; VI-NEXT:    s_endpgm
667  %or = xor i64 %a, 4261135838621753
668  store i64 %or, ptr addrspace(1) %out
669
670  %foo = add i64 %b, 4261135838621753
671  store volatile i64 %foo, ptr addrspace(1) undef
672  ret void
673}
674
675define amdgpu_kernel void @scalar_xor_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
676; SI-LABEL: scalar_xor_inline_imm_i64:
677; SI:       ; %bb.0:
678; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
679; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
680; SI-NEXT:    s_mov_b32 s3, 0xf000
681; SI-NEXT:    s_mov_b32 s2, -1
682; SI-NEXT:    s_waitcnt lgkmcnt(0)
683; SI-NEXT:    s_xor_b32 s4, s6, 63
684; SI-NEXT:    v_mov_b32_e32 v0, s4
685; SI-NEXT:    v_mov_b32_e32 v1, s7
686; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
687; SI-NEXT:    s_endpgm
688;
689; VI-LABEL: scalar_xor_inline_imm_i64:
690; VI:       ; %bb.0:
691; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x4c
692; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
693; VI-NEXT:    s_waitcnt lgkmcnt(0)
694; VI-NEXT:    s_xor_b32 s0, s0, 63
695; VI-NEXT:    v_mov_b32_e32 v2, s2
696; VI-NEXT:    v_mov_b32_e32 v1, s1
697; VI-NEXT:    v_mov_b32_e32 v0, s0
698; VI-NEXT:    v_mov_b32_e32 v3, s3
699; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
700; VI-NEXT:    s_endpgm
701  %or = xor i64 %a, 63
702  store i64 %or, ptr addrspace(1) %out
703  ret void
704}
705
706define amdgpu_kernel void @scalar_xor_neg_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
707; SI-LABEL: scalar_xor_neg_inline_imm_i64:
708; SI:       ; %bb.0:
709; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
710; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
711; SI-NEXT:    s_mov_b32 s3, 0xf000
712; SI-NEXT:    s_mov_b32 s2, -1
713; SI-NEXT:    s_waitcnt lgkmcnt(0)
714; SI-NEXT:    s_xor_b64 s[4:5], s[6:7], -8
715; SI-NEXT:    v_mov_b32_e32 v0, s4
716; SI-NEXT:    v_mov_b32_e32 v1, s5
717; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
718; SI-NEXT:    s_endpgm
719;
720; VI-LABEL: scalar_xor_neg_inline_imm_i64:
721; VI:       ; %bb.0:
722; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x4c
723; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
724; VI-NEXT:    s_waitcnt lgkmcnt(0)
725; VI-NEXT:    s_xor_b64 s[0:1], s[0:1], -8
726; VI-NEXT:    v_mov_b32_e32 v0, s2
727; VI-NEXT:    v_mov_b32_e32 v3, s1
728; VI-NEXT:    v_mov_b32_e32 v1, s3
729; VI-NEXT:    v_mov_b32_e32 v2, s0
730; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
731; VI-NEXT:    s_endpgm
732  %or = xor i64 %a, -8
733  store i64 %or, ptr addrspace(1) %out
734  ret void
735}
736
737define amdgpu_kernel void @vector_xor_i64_neg_inline_imm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
738; SI-LABEL: vector_xor_i64_neg_inline_imm:
739; SI:       ; %bb.0:
740; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
741; SI-NEXT:    s_mov_b32 s7, 0xf000
742; SI-NEXT:    s_mov_b32 s6, -1
743; SI-NEXT:    s_mov_b32 s10, s6
744; SI-NEXT:    s_mov_b32 s11, s7
745; SI-NEXT:    s_waitcnt lgkmcnt(0)
746; SI-NEXT:    s_mov_b32 s8, s2
747; SI-NEXT:    s_mov_b32 s9, s3
748; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
749; SI-NEXT:    s_mov_b32 s4, s0
750; SI-NEXT:    s_mov_b32 s5, s1
751; SI-NEXT:    s_waitcnt vmcnt(0)
752; SI-NEXT:    v_xor_b32_e32 v0, -8, v0
753; SI-NEXT:    v_xor_b32_e32 v1, -1, v1
754; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
755; SI-NEXT:    s_endpgm
756;
757; VI-LABEL: vector_xor_i64_neg_inline_imm:
758; VI:       ; %bb.0:
759; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
760; VI-NEXT:    s_waitcnt lgkmcnt(0)
761; VI-NEXT:    v_mov_b32_e32 v0, s2
762; VI-NEXT:    v_mov_b32_e32 v1, s3
763; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
764; VI-NEXT:    v_mov_b32_e32 v2, s0
765; VI-NEXT:    v_mov_b32_e32 v3, s1
766; VI-NEXT:    s_waitcnt vmcnt(0)
767; VI-NEXT:    v_xor_b32_e32 v0, -8, v0
768; VI-NEXT:    v_xor_b32_e32 v1, -1, v1
769; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
770; VI-NEXT:    s_endpgm
771  %loada = load i64, ptr addrspace(1) %a, align 8
772  %or = xor i64 %loada, -8
773  store i64 %or, ptr addrspace(1) %out
774  ret void
775}
776
777define amdgpu_kernel void @vector_xor_literal_i64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
778; SI-LABEL: vector_xor_literal_i64:
779; SI:       ; %bb.0:
780; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
781; SI-NEXT:    s_mov_b32 s7, 0xf000
782; SI-NEXT:    s_mov_b32 s6, -1
783; SI-NEXT:    s_mov_b32 s10, s6
784; SI-NEXT:    s_mov_b32 s11, s7
785; SI-NEXT:    s_waitcnt lgkmcnt(0)
786; SI-NEXT:    s_mov_b32 s8, s2
787; SI-NEXT:    s_mov_b32 s9, s3
788; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
789; SI-NEXT:    s_mov_b32 s4, s0
790; SI-NEXT:    s_mov_b32 s5, s1
791; SI-NEXT:    s_waitcnt vmcnt(0)
792; SI-NEXT:    v_xor_b32_e32 v1, 0x146f, v1
793; SI-NEXT:    v_xor_b32_e32 v0, 0xdf77987f, v0
794; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
795; SI-NEXT:    s_endpgm
796;
797; VI-LABEL: vector_xor_literal_i64:
798; VI:       ; %bb.0:
799; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
800; VI-NEXT:    s_waitcnt lgkmcnt(0)
801; VI-NEXT:    v_mov_b32_e32 v0, s2
802; VI-NEXT:    v_mov_b32_e32 v1, s3
803; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
804; VI-NEXT:    v_mov_b32_e32 v2, s0
805; VI-NEXT:    v_mov_b32_e32 v3, s1
806; VI-NEXT:    s_waitcnt vmcnt(0)
807; VI-NEXT:    v_xor_b32_e32 v1, 0x146f, v1
808; VI-NEXT:    v_xor_b32_e32 v0, 0xdf77987f, v0
809; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
810; VI-NEXT:    s_endpgm
811  %loada = load i64, ptr addrspace(1) %a, align 8
812  %or = xor i64 %loada, 22470723082367
813  store i64 %or, ptr addrspace(1) %out
814  ret void
815}
816;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
817; GCN: {{.*}}
818