xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; The custom CSR spills inserted during the frame lowering was earlier using SP as the frame base.
5; The offsets allocated for the CS objects go wrong when any local stack object has a higher
6; alignment requirement than the default stack alignment for AMDGPU (either 4 or 16). The offsets
7; in such cases should be from the newly aligned FP. Even to adjust the offset from the SP value
8; at function entry, the FP-SP can't be statically determined with dynamic stack realignment. To
9; fix the problem, use FP as the frame base in the spills whenever the function has FP.
10
11define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 {
12; GCN-LABEL: test_stack_realign:
13; GCN:       ; %bb.0:
14; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GCN-NEXT:    s_mov_b32 s16, s33
16; GCN-NEXT:    s_add_i32 s33, s32, 0xfc0
17; GCN-NEXT:    s_and_b32 s33, s33, 0xfffff000
18; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
19; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
20; GCN-NEXT:    s_mov_b64 exec, s[18:19]
21; GCN-NEXT:    v_writelane_b32 v42, s16, 2
22; GCN-NEXT:    v_writelane_b32 v42, s34, 3
23; GCN-NEXT:    s_mov_b32 s34, s32
24; GCN-NEXT:    s_addk_i32 s32, 0x3000
25; GCN-NEXT:    s_getpc_b64 s[16:17]
26; GCN-NEXT:    s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
27; GCN-NEXT:    s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
28; GCN-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
29; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
30; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
31; GCN-NEXT:    v_writelane_b32 v42, s30, 0
32; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:92
33; GCN-NEXT:    s_waitcnt vmcnt(0)
34; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:88
35; GCN-NEXT:    s_waitcnt vmcnt(0)
36; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:84
37; GCN-NEXT:    s_waitcnt vmcnt(0)
38; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:80
39; GCN-NEXT:    s_waitcnt vmcnt(0)
40; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:76
41; GCN-NEXT:    s_waitcnt vmcnt(0)
42; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:72
43; GCN-NEXT:    s_waitcnt vmcnt(0)
44; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:68
45; GCN-NEXT:    s_waitcnt vmcnt(0)
46; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:64
47; GCN-NEXT:    s_waitcnt vmcnt(0)
48; GCN-NEXT:    v_mov_b32_e32 v0, v8
49; GCN-NEXT:    v_writelane_b32 v42, s31, 1
50; GCN-NEXT:    ;;#ASMSTART
51; GCN-NEXT:    ;;#ASMEND
52; GCN-NEXT:    ;;#ASMSTART
53; GCN-NEXT:    ;;#ASMEND
54; GCN-NEXT:    s_waitcnt lgkmcnt(0)
55; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
56; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
57; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
58; GCN-NEXT:    v_readlane_b32 s31, v42, 1
59; GCN-NEXT:    v_readlane_b32 s30, v42, 0
60; GCN-NEXT:    s_mov_b32 s32, s34
61; GCN-NEXT:    v_readlane_b32 s4, v42, 2
62; GCN-NEXT:    v_readlane_b32 s34, v42, 3
63; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
64; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
65; GCN-NEXT:    s_mov_b64 exec, s[6:7]
66; GCN-NEXT:    s_mov_b32 s33, s4
67; GCN-NEXT:    s_waitcnt vmcnt(0)
68; GCN-NEXT:    s_setpc_b64 s[30:31]
69  %alloca.val = alloca <8 x i32>, align 64, addrspace(5)
70  store volatile <8 x i32> %val, ptr addrspace(5) %alloca.val, align 64
71  call void asm sideeffect "", "~{v40}" ()
72  call void asm sideeffect "", "~{v41}" ()
73  call void @extern_func(i32 %idx)
74  ret void
75}
76
77declare void @extern_func(i32) #0
78
79attributes #0 = { noinline nounwind }
80