xref: /llvm-project/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll (revision 404071b059818ba51b6a832e5e09a93a4a8b579e)
1; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; The bitcast should be pushed through the bitcasts so the vectors can
5; be broken down and the shared components can be CSEd
6
7; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
8; GCN: buffer_store_dwordx4
9; GCN: buffer_store_dwordx4
10; GCN-NOT: v_mov_b32
11; GCN: buffer_store_dwordx4
12; GCN-NOT: v_mov_b32
13; GCN: buffer_store_dwordx4
14define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(ptr addrspace(1) %out, <8 x i32> %vec) {
15  %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
16  store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
17
18  %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
19  store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
20  ret void
21}
22
23; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
24; GCN: buffer_store_dwordx4
25; GCN: buffer_store_dwordx4
26; GCN-NOT: v_mov_b32
27; GCN: buffer_store_dwordx4
28; GCN-NOT: v_mov_b32
29; GCN: buffer_store_dwordx4
30define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(ptr addrspace(1) %out, <4 x i64> %vec) {
31  %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
32  store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
33
34  %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
35  store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
36  ret void
37}
38
39; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
40; GCN: buffer_store_dwordx4
41; GCN: buffer_store_dwordx4
42; GCN-NOT: v_mov_b32
43; GCN: buffer_store_dwordx4
44; GCN-NOT: v_mov_b32
45; GCN: buffer_store_dwordx4
46define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %vec) {
47  %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
48  store volatile <4 x double> %vec0.bc, ptr addrspace(1) %out
49
50  %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
51  store volatile <4 x double> %vec1.bc, ptr addrspace(1) %out
52  ret void
53}
54
55; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
56; GCN: buffer_store_dwordx4
57; GCN: buffer_store_dwordx4
58; GCN-NOT: v_mov_b32
59; GCN: buffer_store_dwordx4
60; GCN-NOT: v_mov_b32
61; GCN: buffer_store_dwordx4
62define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(1) %out, <16 x i16> %vec) {
63  %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
64  store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
65
66  %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
67  store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
68  ret void
69}
70
71; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
72; GCN-NOT: store_dword
73define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
74  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
75  %bc = bitcast i64 %undef to <2 x i32>
76  store <2 x i32> %bc, ptr addrspace(1) %out
77  ret void
78}
79
80; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
81; GCN-NOT: store_dword
82define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
83  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
84  %bc = bitcast i64 %undef to <2 x i32>
85  %elt1 = extractelement <2 x i32> %bc, i32 1
86  store i32 %elt1, ptr addrspace(1) %out
87  ret void
88}
89
90declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
91
92attributes #0 = { nounwind }
93attributes #1 = { nounwind readnone convergent }
94