1; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 < %s | llc -mcpu=gfx900 | FileCheck -check-prefixes=GCN,RW-FLAT %s 2; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s 3; RUN: opt -passes=amdgpu-attributor -mcpu=gfx940 < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s 4 5target triple = "amdgcn-amd-amdhsa" 6 7; Make sure flat_scratch_init is set 8 9; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls: 10; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7 11; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0 12; RO-FLAT-NOT: flat_scratch 13; GCN: flat_store_dword 14; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 15; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1 16; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 17; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 18; RW-FLAT-NOT: .amdhsa_enable_private_segment 19; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 20; RO-FLAT: .amdhsa_enable_private_segment 1 21; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1 22; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6 23; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 24define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { 25 %alloca = alloca i32, addrspace(5) 26 %cast = addrspacecast ptr addrspace(5) %alloca to ptr 27 store volatile i32 0, ptr %cast 28 ret void 29} 30 31; TODO: Could optimize out in this case 32; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls: 33; RO-FLAT-NOT: flat_scratch 34; RW-FLAT: buffer_store_dword 35; RO-FLAT: scratch_store_dword 36; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1 37; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 38; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1 39; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 40; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 41; RW-FLAT-NOT: .amdhsa_enable_private_segment 42; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 43; RO-FLAT: .amdhsa_enable_private_segment 1 44; RW-FLAT: .amdhsa_reserve_flat_scratch 0 45; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1 46; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6 47; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 48define amdgpu_kernel void @stack_object_in_kernel_no_calls() { 49 %alloca = alloca i32, addrspace(5) 50 store volatile i32 0, ptr addrspace(5) %alloca 51 ret void 52} 53 54; GCN-LABEL: {{^}}kernel_no_calls_no_stack: 55; GCN-NOT: flat_scratch 56; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1 57; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 58; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 0 59; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 60; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 61; RW-FLAT-NOT: .amdhsa_enable_private_segment 62; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 63; RO-FLAT: .amdhsa_enable_private_segment 0 64; RW-FLAT: .amdhsa_reserve_flat_scratch 0 65; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 0 66; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 4 67; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 68define amdgpu_kernel void @kernel_no_calls_no_stack() { 69 ret void 70} 71