xref: /llvm-project/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll (revision ceb68eea8c5cfd7893d55afaef93932762bbebaa)
1; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; How the replacement of i64 stores with v2i32 stores resulted in
5; breaking other users of the bitcast if they already existed
6
7; GCN-LABEL: {{^}}extract_vector_elt_select_error:
8; GCN: buffer_store_dword
9; GCN: buffer_store_dword
10; GCN: buffer_store_dwordx2
11define amdgpu_kernel void @extract_vector_elt_select_error(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %val) #0 {
12  %vec = bitcast i64 %val to <2 x i32>
13  %elt0 = extractelement <2 x i32> %vec, i32 0
14  %elt1 = extractelement <2 x i32> %vec, i32 1
15
16  store volatile i32 %elt0, ptr addrspace(1) %out
17  store volatile i32 %elt1, ptr addrspace(1) %out
18  store volatile i64 %val, ptr addrspace(1) %in
19  ret void
20}
21
22; GCN-LABEL: {{^}}extract_vector_elt_v2i64:
23define amdgpu_kernel void @extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x i64> %foo) #0 {
24  %p0 = extractelement <2 x i64> %foo, i32 0
25  %p1 = extractelement <2 x i64> %foo, i32 1
26  %out1 = getelementptr i64, ptr addrspace(1) %out, i32 1
27  store volatile i64 %p1, ptr addrspace(1) %out
28  store volatile i64 %p0, ptr addrspace(1) %out1
29  ret void
30}
31
32; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64:
33; GCN-NOT: buffer_load
34; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
35; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
36; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
37; GCN: store_dwordx2 v[{{[0-9:]+}}]
38define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x i64> %foo, i32 %elt) #0 {
39  %dynelt = extractelement <2 x i64> %foo, i32 %elt
40  store volatile i64 %dynelt, ptr addrspace(1) %out
41  ret void
42}
43
44; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64_2:
45; GCN:     buffer_load_dwordx4
46; GCN-NOT: buffer_load
47; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
48; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
49; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
50; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
51; GCN: store_dwordx2 v[{{[0-9:]+}}]
52define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(ptr addrspace(1) %out, ptr addrspace(1) %foo, i32 %elt, <2 x i64> %arst) #0 {
53  %load = load volatile <2 x i64>, ptr addrspace(1) %foo
54  %or = or <2 x i64> %load, %arst
55  %dynelt = extractelement <2 x i64> %or, i32 %elt
56  store volatile i64 %dynelt, ptr addrspace(1) %out
57  ret void
58}
59
60; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64:
61; GCN-NOT: buffer_load
62; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
63; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
64; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
65; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2
66; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
67; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
68; GCN: store_dwordx2 v[{{[0-9:]+}}]
69define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(ptr addrspace(1) %out, <3 x i64> %foo, i32 %elt) #0 {
70  %dynelt = extractelement <3 x i64> %foo, i32 %elt
71  store volatile i64 %dynelt, ptr addrspace(1) %out
72  ret void
73}
74
75; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64:
76; GCN-NOT: buffer_load
77; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
78; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
79; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
80; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2
81; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
82; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
83; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 3
84; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
85; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
86; GCN: store_dwordx2 v[{{[0-9:]+}}]
87define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(ptr addrspace(1) %out, <4 x i64> %foo, i32 %elt) #0 {
88  %dynelt = extractelement <4 x i64> %foo, i32 %elt
89  store volatile i64 %dynelt, ptr addrspace(1) %out
90  ret void
91}
92
93attributes #0 = { nounwind }
94