1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_OPT %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_ARCH %s 4 5declare void @extern_func() #0 6 7define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { 8; FLAT_SCR_OPT-LABEL: stack_object_addrspacecast_in_kernel_no_calls: 9; FLAT_SCR_OPT: ; %bb.0: 10; FLAT_SCR_OPT-NEXT: s_add_u32 s8, s8, s13 11; FLAT_SCR_OPT-NEXT: s_addc_u32 s9, s9, 0 12; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 13; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 14; FLAT_SCR_OPT-NEXT: s_mov_b64 s[0:1], src_private_base 15; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, 0 16; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v1, s1 17; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0 18; FLAT_SCR_OPT-NEXT: flat_store_dword v[0:1], v2 19; FLAT_SCR_OPT-NEXT: s_waitcnt_vscnt null, 0x0 20; FLAT_SCR_OPT-NEXT: s_endpgm 21; 22; FLAT_SCR_ARCH-LABEL: stack_object_addrspacecast_in_kernel_no_calls: 23; FLAT_SCR_ARCH: ; %bb.0: 24; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[0:1], src_private_base 25; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, 0 26; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v1, s1 27; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0 28; FLAT_SCR_ARCH-NEXT: flat_store_dword v[0:1], v2 29; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0 30; FLAT_SCR_ARCH-NEXT: s_endpgm 31 %alloca = alloca i32, addrspace(5) 32 %cast = addrspacecast ptr addrspace(5) %alloca to ptr 33 store volatile i32 0, ptr %cast 34 ret void 35} 36 37define amdgpu_kernel void @stack_object_in_kernel_no_calls() { 38; FLAT_SCR_OPT-LABEL: stack_object_in_kernel_no_calls: 39; FLAT_SCR_OPT: ; %bb.0: 40; FLAT_SCR_OPT-NEXT: s_add_u32 s8, s8, s13 41; FLAT_SCR_OPT-NEXT: s_addc_u32 s9, s9, 0 42; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 43; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 44; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, 0 45; FLAT_SCR_OPT-NEXT: s_mov_b32 s0, 0 46; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s0 47; FLAT_SCR_OPT-NEXT: s_waitcnt_vscnt null, 0x0 48; FLAT_SCR_OPT-NEXT: s_endpgm 49; 50; FLAT_SCR_ARCH-LABEL: stack_object_in_kernel_no_calls: 51; FLAT_SCR_ARCH: ; %bb.0: 52; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, 0 53; FLAT_SCR_ARCH-NEXT: s_mov_b32 s0, 0 54; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s0 55; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0 56; FLAT_SCR_ARCH-NEXT: s_endpgm 57 %alloca = alloca i32, addrspace(5) 58 store volatile i32 0, ptr addrspace(5) %alloca 59 ret void 60} 61 62define amdgpu_kernel void @kernel_calls_no_stack() { 63; FLAT_SCR_OPT-LABEL: kernel_calls_no_stack: 64; FLAT_SCR_OPT: ; %bb.0: 65; FLAT_SCR_OPT-NEXT: s_add_u32 s8, s8, s13 66; FLAT_SCR_OPT-NEXT: s_mov_b32 s32, 0 67; FLAT_SCR_OPT-NEXT: s_addc_u32 s9, s9, 0 68; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 69; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 70; FLAT_SCR_OPT-NEXT: s_mov_b64 s[8:9], s[4:5] 71; FLAT_SCR_OPT-NEXT: s_getpc_b64 s[4:5] 72; FLAT_SCR_OPT-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 73; FLAT_SCR_OPT-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 74; FLAT_SCR_OPT-NEXT: v_lshlrev_b32_e32 v2, 20, v2 75; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 76; FLAT_SCR_OPT-NEXT: v_lshlrev_b32_e32 v1, 10, v1 77; FLAT_SCR_OPT-NEXT: s_mov_b32 s14, s12 78; FLAT_SCR_OPT-NEXT: s_mov_b32 s13, s11 79; FLAT_SCR_OPT-NEXT: s_mov_b32 s12, s10 80; FLAT_SCR_OPT-NEXT: s_mov_b64 s[10:11], s[6:7] 81; FLAT_SCR_OPT-NEXT: v_or3_b32 v31, v0, v1, v2 82; FLAT_SCR_OPT-NEXT: s_mov_b64 s[4:5], s[0:1] 83; FLAT_SCR_OPT-NEXT: s_mov_b64 s[6:7], s[2:3] 84; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) 85; FLAT_SCR_OPT-NEXT: s_swappc_b64 s[30:31], s[16:17] 86; FLAT_SCR_OPT-NEXT: s_endpgm 87; 88; FLAT_SCR_ARCH-LABEL: kernel_calls_no_stack: 89; FLAT_SCR_ARCH: ; %bb.0: 90; FLAT_SCR_ARCH-NEXT: s_mov_b32 s13, s9 91; FLAT_SCR_ARCH-NEXT: s_mov_b32 s12, s8 92; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[8:9], s[4:5] 93; FLAT_SCR_ARCH-NEXT: s_getpc_b64 s[4:5] 94; FLAT_SCR_ARCH-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 95; FLAT_SCR_ARCH-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 96; FLAT_SCR_ARCH-NEXT: v_lshlrev_b32_e32 v2, 20, v2 97; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 98; FLAT_SCR_ARCH-NEXT: v_lshlrev_b32_e32 v1, 10, v1 99; FLAT_SCR_ARCH-NEXT: s_mov_b32 s14, s10 100; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[10:11], s[6:7] 101; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[4:5], s[0:1] 102; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[6:7], s[2:3] 103; FLAT_SCR_ARCH-NEXT: v_or3_b32 v31, v0, v1, v2 104; FLAT_SCR_ARCH-NEXT: s_mov_b32 s32, 0 105; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) 106; FLAT_SCR_ARCH-NEXT: s_swappc_b64 s[30:31], s[16:17] 107; FLAT_SCR_ARCH-NEXT: s_endpgm 108 call void @extern_func() 109 ret void 110} 111 112define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) { 113; GCN-LABEL: test: 114; GCN: ; %bb.0: 115; GCN-NEXT: s_clause 0x1 116; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 117; GCN-NEXT: s_load_dword vcc_lo, s[4:5], 0x8 118; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane 119; GCN-NEXT: ; kill: killed $sgpr4_sgpr5 120; GCN-NEXT: s_waitcnt lgkmcnt(0) 121; GCN-NEXT: v_writelane_b32 v0, s0, 0 122; GCN-NEXT: v_writelane_b32 v0, s1, 1 123; GCN-NEXT: ;;#ASMSTART 124; GCN-NEXT: ;;#ASMEND 125; GCN-NEXT: ;;#ASMSTART 126; GCN-NEXT: ;;#ASMEND 127; GCN-NEXT: ;;#ASMSTART 128; GCN-NEXT: ;;#ASMEND 129; GCN-NEXT: ;;#ASMSTART 130; GCN-NEXT: ;;#ASMEND 131; GCN-NEXT: ;;#ASMSTART 132; GCN-NEXT: ;;#ASMEND 133; GCN-NEXT: ;;#ASMSTART 134; GCN-NEXT: ;;#ASMEND 135; GCN-NEXT: ;;#ASMSTART 136; GCN-NEXT: ;;#ASMEND 137; GCN-NEXT: ;;#ASMSTART 138; GCN-NEXT: ;;#ASMEND 139; GCN-NEXT: ;;#ASMSTART 140; GCN-NEXT: ;;#ASMEND 141; GCN-NEXT: ;;#ASMSTART 142; GCN-NEXT: ;;#ASMEND 143; GCN-NEXT: ;;#ASMSTART 144; GCN-NEXT: ;;#ASMEND 145; GCN-NEXT: ;;#ASMSTART 146; GCN-NEXT: ;;#ASMEND 147; GCN-NEXT: ;;#ASMSTART 148; GCN-NEXT: ;;#ASMEND 149; GCN-NEXT: ;;#ASMSTART 150; GCN-NEXT: ;;#ASMEND 151; GCN-NEXT: ;;#ASMSTART 152; GCN-NEXT: ;;#ASMEND 153; GCN-NEXT: v_readlane_b32 s0, v0, 0 154; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo 155; GCN-NEXT: v_readlane_b32 s1, v0, 1 156; GCN-NEXT: v_mov_b32_e32 v2, 0 157; GCN-NEXT: ;;#ASMSTART 158; GCN-NEXT: ;;#ASMEND 159; GCN-NEXT: ;;#ASMSTART 160; GCN-NEXT: ;;#ASMEND 161; GCN-NEXT: ;;#ASMSTART 162; GCN-NEXT: ;;#ASMEND 163; GCN-NEXT: ;;#ASMSTART 164; GCN-NEXT: ;;#ASMEND 165; GCN-NEXT: ;;#ASMSTART 166; GCN-NEXT: ;;#ASMEND 167; GCN-NEXT: ;;#ASMSTART 168; GCN-NEXT: ;;#ASMEND 169; GCN-NEXT: ;;#ASMSTART 170; GCN-NEXT: ;;#ASMEND 171; GCN-NEXT: ;;#ASMSTART 172; GCN-NEXT: ;;#ASMEND 173; GCN-NEXT: ;;#ASMSTART 174; GCN-NEXT: ;;#ASMEND 175; GCN-NEXT: ;;#ASMSTART 176; GCN-NEXT: ;;#ASMEND 177; GCN-NEXT: ;;#ASMSTART 178; GCN-NEXT: ;;#ASMEND 179; GCN-NEXT: ;;#ASMSTART 180; GCN-NEXT: ;;#ASMEND 181; GCN-NEXT: ;;#ASMSTART 182; GCN-NEXT: ;;#ASMEND 183; GCN-NEXT: ;;#ASMSTART 184; GCN-NEXT: ;;#ASMEND 185; GCN-NEXT: ;;#ASMSTART 186; GCN-NEXT: ;;#ASMEND 187; GCN-NEXT: ;;#ASMSTART 188; GCN-NEXT: ;;#ASMEND 189; GCN-NEXT: ;;#ASMSTART 190; GCN-NEXT: ;;#ASMEND 191; GCN-NEXT: ;;#ASMSTART 192; GCN-NEXT: ;;#ASMEND 193; GCN-NEXT: ;;#ASMSTART 194; GCN-NEXT: ;;#ASMEND 195; GCN-NEXT: ;;#ASMSTART 196; GCN-NEXT: ;;#ASMEND 197; GCN-NEXT: ;;#ASMSTART 198; GCN-NEXT: ;;#ASMEND 199; GCN-NEXT: ;;#ASMSTART 200; GCN-NEXT: ;;#ASMEND 201; GCN-NEXT: ;;#ASMSTART 202; GCN-NEXT: ;;#ASMEND 203; GCN-NEXT: ;;#ASMSTART 204; GCN-NEXT: ;;#ASMEND 205; GCN-NEXT: ;;#ASMSTART 206; GCN-NEXT: ;;#ASMEND 207; GCN-NEXT: ;;#ASMSTART 208; GCN-NEXT: ;;#ASMEND 209; GCN-NEXT: ;;#ASMSTART 210; GCN-NEXT: ;;#ASMEND 211; GCN-NEXT: ;;#ASMSTART 212; GCN-NEXT: ;;#ASMEND 213; GCN-NEXT: ;;#ASMSTART 214; GCN-NEXT: ;;#ASMEND 215; GCN-NEXT: ;;#ASMSTART 216; GCN-NEXT: ;;#ASMEND 217; GCN-NEXT: ;;#ASMSTART 218; GCN-NEXT: ;;#ASMEND 219; GCN-NEXT: global_store_dword v2, v1, s[0:1] 220; GCN-NEXT: s_endpgm 221 call void asm sideeffect "", "~{s[0:7]}" () 222 call void asm sideeffect "", "~{s[8:15]}" () 223 call void asm sideeffect "", "~{s[16:23]}" () 224 call void asm sideeffect "", "~{s[24:31]}" () 225 call void asm sideeffect "", "~{s[32:39]}" () 226 call void asm sideeffect "", "~{s[40:47]}" () 227 call void asm sideeffect "", "~{s[48:55]}" () 228 call void asm sideeffect "", "~{s[56:63]}" () 229 call void asm sideeffect "", "~{s[64:71]}" () 230 call void asm sideeffect "", "~{s[72:79]}" () 231 call void asm sideeffect "", "~{s[80:87]}" () 232 call void asm sideeffect "", "~{s[88:95]}" () 233 call void asm sideeffect "", "~{s[96:103]}" () 234 call void asm sideeffect "", "~{s[104:105]}" () 235 call void asm sideeffect "", "~{v[1:7]}" () 236 call void asm sideeffect "", "~{v[8:15]}" () 237 call void asm sideeffect "", "~{v[16:23]}" () 238 call void asm sideeffect "", "~{v[24:31]}" () 239 call void asm sideeffect "", "~{v[32:39]}" () 240 call void asm sideeffect "", "~{v[40:47]}" () 241 call void asm sideeffect "", "~{v[48:55]}" () 242 call void asm sideeffect "", "~{v[56:63]}" () 243 call void asm sideeffect "", "~{v[64:71]}" () 244 call void asm sideeffect "", "~{v[72:79]}" () 245 call void asm sideeffect "", "~{v[80:87]}" () 246 call void asm sideeffect "", "~{v[88:95]}" () 247 call void asm sideeffect "", "~{v[96:103]}" () 248 call void asm sideeffect "", "~{v[104:111]}" () 249 call void asm sideeffect "", "~{v[112:119]}" () 250 call void asm sideeffect "", "~{v[120:127]}" () 251 call void asm sideeffect "", "~{v[128:135]}" () 252 call void asm sideeffect "", "~{v[136:143]}" () 253 call void asm sideeffect "", "~{v[144:151]}" () 254 call void asm sideeffect "", "~{v[152:159]}" () 255 call void asm sideeffect "", "~{v[160:167]}" () 256 call void asm sideeffect "", "~{v[168:175]}" () 257 call void asm sideeffect "", "~{v[176:183]}" () 258 call void asm sideeffect "", "~{v[184:191]}" () 259 call void asm sideeffect "", "~{v[192:199]}" () 260 call void asm sideeffect "", "~{v[200:207]}" () 261 call void asm sideeffect "", "~{v[208:215]}" () 262 call void asm sideeffect "", "~{v[216:223]}" () 263 call void asm sideeffect "", "~{v[224:231]}" () 264 call void asm sideeffect "", "~{v[232:239]}" () 265 call void asm sideeffect "", "~{v[240:247]}" () 266 call void asm sideeffect "", "~{v[248:255]}" () 267 268 store i32 %in, ptr addrspace(1) %out 269 ret void 270} 271 272define amdgpu_kernel void @kernel_no_calls_no_stack() { 273; GCN-LABEL: kernel_no_calls_no_stack: 274; GCN: ; %bb.0: 275; GCN-NEXT: s_endpgm 276 ret void 277} 278 279attributes #0 = { nounwind } 280 281!llvm.module.flags = !{!0} 282!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 283