xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll (revision 109cd11dc4aea6b3596f8b2cb5a719f35b190cfa)
1; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 < %s | llc -mcpu=gfx900 | FileCheck -check-prefixes=GCN,RW-FLAT %s
2; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s
3; RUN: opt -passes=amdgpu-attributor -mcpu=gfx940 < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s
4
5target triple = "amdgcn-amd-amdhsa"
6
7; Make sure flat_scratch_init is set
8
9; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
10; RW-FLAT:     s_add_u32 flat_scratch_lo, s4, s7
11; RW-FLAT:     s_addc_u32 flat_scratch_hi, s5, 0
12; RO-FLAT-NOT: flat_scratch
13; GCN:         flat_store_dword
14; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
15; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
16; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
17; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset
18; RW-FLAT-NOT: .amdhsa_enable_private_segment
19; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
20; RO-FLAT:     .amdhsa_enable_private_segment 1
21; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
22; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
23; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
24define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
25  %alloca = alloca i32, addrspace(5)
26  %cast = addrspacecast ptr addrspace(5) %alloca to ptr
27  store volatile i32 0, ptr %cast
28  ret void
29}
30
31; TODO: Could optimize out in this case
32; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
33; RO-FLAT-NOT: flat_scratch
34; RW-FLAT:     buffer_store_dword
35; RO-FLAT:     scratch_store_dword
36; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
37; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
38; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
39; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
40; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
41; RW-FLAT-NOT: .amdhsa_enable_private_segment
42; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
43; RO-FLAT:     .amdhsa_enable_private_segment 1
44; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
45; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
46; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
47; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
48define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
49  %alloca = alloca i32, addrspace(5)
50  store volatile i32 0, ptr addrspace(5) %alloca
51  ret void
52}
53
54; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
55; GCN-NOT:    flat_scratch
56; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
57; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
58; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 0
59; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
60; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 0
61; RW-FLAT-NOT: .amdhsa_enable_private_segment
62; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
63; RO-FLAT:     .amdhsa_enable_private_segment 0
64; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
65; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
66; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 4
67; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
68define amdgpu_kernel void @kernel_no_calls_no_stack() {
69  ret void
70}
71