1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FLATSCR %s 5 6declare hidden void @external_void_func_void() #3 7 8; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 9; GCN: s_getpc_b64 s[34:35] 10; GCN-NEXT: s_add_u32 s34, s34, 11; GCN-NEXT: s_addc_u32 s35, s35, 12; GCN: s_swappc_b64 s[30:31], s[34:35] 13 14; GCN-NEXT: #ASMSTART 15; GCN-NEXT: #ASMEND 16; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35] 17define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { 18 call void @external_void_func_void() 19 call void asm sideeffect "", ""() #0 20 call void @external_void_func_void() 21 ret void 22} 23 24; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 25; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 26; MUBUF: buffer_store_dword 27; FLATSCR: scratch_store_dword 28; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4 29; GCN: v_writelane_b32 v40, s30, 0 30; GCN: v_writelane_b32 v40, s31, 1 31; GCN: v_writelane_b32 v40, s34, 2 32; GCN: v_writelane_b32 v40, s35, 3 33 34; GCN: s_swappc_b64 35; GCN-NEXT: ;;#ASMSTART 36; GCN-NEXT: ;;#ASMEND 37; GCN-NEXT: s_swappc_b64 38; GCN: v_readlane_b32 s35, v40, 3 39; GCN: v_readlane_b32 s34, v40, 2 40; MUBUF-DAG: v_readlane_b32 s31, v40, 1 41; MUBUF-DAG: v_readlane_b32 s30, v40, 0 42; FLATSCR-DAG: v_readlane_b32 s31, v40, 1 43; FLATSCR-DAG: v_readlane_b32 s30, v40, 0 44 45; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4 46; MUBUF: buffer_load_dword 47; FLATSCR: scratch_load_dword 48; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]] 49; GCN: s_setpc_b64 s[30:31] 50define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { 51 call void @external_void_func_void() 52 call void asm sideeffect "", ""() #0 53 call void @external_void_func_void() 54 ret void 55} 56 57; GCN-LABEL: {{^}}test_func_call_external_void_funcx2: 58; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 59; GCN: s_mov_b32 s33, s32 60; MUBUF: buffer_store_dword v40 61; FLATSCR: scratch_store_dword off, v40 62; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4 63; MUBUF: s_addk_i32 s32, 0x400 64; FLATSCR: s_add_i32 s32, s32, 16 65 66; GCN: s_swappc_b64 67; GCN-NEXT: s_swappc_b64 68 69; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4 70; MUBUF: buffer_load_dword v40 71; FLATSCR: scratch_load_dword v40 72; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]] 73define void @test_func_call_external_void_funcx2() #0 { 74 call void @external_void_func_void() 75 call void @external_void_func_void() 76 ret void 77} 78 79; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31: 80; GCN: s_waitcnt 81; GCN: v_writelane_b32 v0, s30, 0 82; GCN: v_writelane_b32 v0, s31, 1 83; GCN-NEXT: #ASMSTART 84; GCN: ; clobber 85; GCN-NEXT: #ASMEND 86; GCN: v_readlane_b32 s31, v0, 1 87; GCN: v_readlane_b32 s30, v0, 0 88; GCN: s_setpc_b64 s[30:31] 89define void @void_func_void_clobber_s30_s31() #2 { 90 call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 91 ret void 92} 93 94; GCN-LABEL: {{^}}void_func_void_clobber_vcc: 95; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GCN-NEXT: ;;#ASMSTART 97; GCN-NEXT: ;;#ASMEND 98; GCN-NEXT: s_setpc_b64 s[30:31] 99define hidden void @void_func_void_clobber_vcc() #2 { 100 call void asm sideeffect "", "~{vcc}"() #0 101 ret void 102} 103 104; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc: 105; GCN: s_mov_b64 s[34:35], vcc 106; GCN-NEXT: s_getpc_b64 107; GCN-NEXT: s_add_u32 108; GCN-NEXT: s_addc_u32 109; GCN-NEXT: s_swappc_b64 110; GCN: s_mov_b64 vcc, s[34:35] 111define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 { 112 %vcc = call i64 asm sideeffect "; def $0", "={vcc}"() 113 call void @void_func_void_clobber_vcc() 114 %val0 = load volatile i32, ptr addrspace(1) undef 115 %val1 = load volatile i32, ptr addrspace(1) undef 116 call void asm sideeffect "; use $0", "{vcc}"(i64 %vcc) 117 ret void 118} 119 120; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31: 121; GCN: s_mov_b32 s33, s31 122; GCN: s_swappc_b64 123; GCN-NEXT: s_mov_b32 s31, s33 124define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 { 125 %s31 = call i32 asm sideeffect "; def $0", "={s31}"() 126 call void @external_void_func_void() 127 call void asm sideeffect "; use $0", "{s31}"(i32 %s31) 128 ret void 129} 130 131; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31: 132; GCN: v_mov_b32_e32 v40, v31 133; GCN: s_swappc_b64 134; GCN-NEXT: v_mov_b32_e32 v31, v40 135define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 { 136 %v31 = call i32 asm sideeffect "; def $0", "={v31}"() 137 call void @external_void_func_void() 138 call void asm sideeffect "; use $0", "{v31}"(i32 %v31) 139 ret void 140} 141 142; FIXME: What is the expected behavior for reserved registers here? 143 144; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: 145; GCN: #ASMSTART 146; GCN-NEXT: ; def s33 147; GCN-NEXT: #ASMEND 148; FLATSCR: s_getpc_b64 s[0:1] 149; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 150; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 151; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] 152; MUBUF: s_getpc_b64 s[4:5] 153; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 154; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 155; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] 156; GCN: ;;#ASMSTART 157; GCN-NEXT: ; use s33 158; GCN-NEXT: ;;#ASMEND 159; GCN-NOT: s33 160; GCN-NEXT: s_endpgm 161define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 { 162 %s33 = call i32 asm sideeffect "; def $0", "={s33}"() 163 call void @external_void_func_void() 164 call void asm sideeffect "; use $0", "{s33}"(i32 %s33) 165 ret void 166} 167 168; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}} 169; GCN-NOT: s34 170 171; GCN: s_mov_b32 s32, 0 172 173; GCN-NOT: s34 174; GCN: ;;#ASMSTART 175; GCN-NEXT: ; def s34 176; GCN-NEXT: ;;#ASMEND 177; FLATSCR: s_getpc_b64 s[0:1] 178; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 179; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 180; MUBUF: s_getpc_b64 s[4:5] 181; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 182; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 183 184; GCN-NOT: s34 185; MUBUF: s_swappc_b64 s[30:31], s[4:5] 186; FLATSCR: s_swappc_b64 s[30:31], s[0:1] 187 188; GCN-NOT: s34 189 190; GCN-NEXT: ;;#ASMSTART 191; GCN-NEXT: ; use s34 192; GCN-NEXT: ;;#ASMEND 193; GCN-NEXT: s_endpgm 194define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 { 195 %s34 = call i32 asm sideeffect "; def $0", "={s34}"() 196 call void @external_void_func_void() 197 call void asm sideeffect "; use $0", "{s34}"(i32 %s34) 198 ret void 199} 200 201; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v40: {{.*}} 202 203; GCN-NOT: v32 204; GCN: s_mov_b32 s32, 0 205; GCN-NOT: v40 206 207; GCN: ;;#ASMSTART 208; GCN-NEXT: ; def v40 209; GCN-NEXT: ;;#ASMEND 210; MUBUF: s_getpc_b64 s[4:5] 211; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 212; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 213; FLATSCR: s_getpc_b64 s[0:1] 214; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 215; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 216 217; MUBUF: s_swappc_b64 s[30:31], s[4:5] 218; FLATSCR: s_swappc_b64 s[30:31], s[0:1] 219 220; GCN-NOT: v40 221 222; GCN: ;;#ASMSTART 223; GCN-NEXT: ; use v40 224; GCN-NEXT: ;;#ASMEND 225; GCN-NEXT: s_endpgm 226define amdgpu_kernel void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 { 227 %v40 = call i32 asm sideeffect "; def $0", "={v40}"() 228 call void @external_void_func_void() 229 call void asm sideeffect "; use $0", "{v40}"(i32 %v40) 230 ret void 231} 232 233; GCN-LABEL: {{^}}void_func_void_clobber_s33: 234; GCN: v_writelane_b32 v0, s33, 0 235; GCN-NEXT: #ASMSTART 236; GCN-NEXT: ; clobber 237; GCN-NEXT: #ASMEND 238; GCN-NEXT: v_readlane_b32 s33, v0, 0 239; GCN: s_setpc_b64 240define hidden void @void_func_void_clobber_s33() #2 { 241 call void asm sideeffect "; clobber", "~{s33}"() #0 242 ret void 243} 244 245; GCN-LABEL: {{^}}void_func_void_clobber_s34: 246; GCN: v_writelane_b32 v0, s34, 0 247; GCN-NEXT: #ASMSTART 248; GCN-NEXT: ; clobber 249; GCN-NEXT: #ASMEND 250; GCN-NEXT: v_readlane_b32 s34, v0, 0 251; GCN: s_setpc_b64 252define hidden void @void_func_void_clobber_s34() #2 { 253 call void asm sideeffect "; clobber", "~{s34}"() #0 254 ret void 255} 256 257; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33: 258; GCN: s_mov_b32 s32, 0 259; GCN: s_getpc_b64 260; GCN-NEXT: s_add_u32 261; GCN-NEXT: s_addc_u32 262; GCN: s_swappc_b64 263; GCN-NEXT: s_endpgm 264define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 { 265 call void @void_func_void_clobber_s33() 266 ret void 267} 268 269; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s34: 270; GCN: s_mov_b32 s32, 0 271; GCN: s_getpc_b64 272; GCN-NEXT: s_add_u32 273; GCN-NEXT: s_addc_u32 274; GCN: s_swappc_b64 275; GCN-NEXT: s_endpgm 276define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 { 277 call void @void_func_void_clobber_s34() 278 ret void 279} 280 281; GCN-LABEL: {{^}}callee_saved_sgpr_func: 282; GCN-NOT: s40 283; GCN: v_writelane_b32 v40, s40 284; GCN: s_swappc_b64 285; GCN-NOT: s40 286; GCN: ; use s40 287; GCN-NOT: s40 288; GCN: v_readlane_b32 s40, v40 289; GCN-NOT: s40 290define void @callee_saved_sgpr_func() #2 { 291 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 292 call void @external_void_func_void() 293 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 294 ret void 295} 296 297; GCN-LABEL: {{^}}callee_saved_sgpr_kernel: 298; GCN-NOT: s40 299; GCN: ; def s40 300; GCN-NOT: s40 301; GCN: s_swappc_b64 302; GCN-NOT: s40 303; GCN: ; use s40 304; GCN-NOT: s40 305define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 { 306 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 307 call void @external_void_func_void() 308 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 309 ret void 310} 311 312; First call preserved VGPR is used so it can't be used for SGPR spills. 313; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_func: 314; GCN-NOT: s40 315; GCN: v_writelane_b32 v41, s40 316; GCN: s_swappc_b64 317; GCN-NOT: s40 318; GCN: ; use s40 319; GCN-NOT: s40 320; GCN: v_readlane_b32 s40, v41 321; GCN-NOT: s40 322define void @callee_saved_sgpr_vgpr_func() #2 { 323 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 324 %v40 = call i32 asm sideeffect "; def v40", "={v40}"() #0 325 call void @external_void_func_void() 326 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 327 call void asm sideeffect "; use $0", "v"(i32 %v40) #0 328 ret void 329} 330 331; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_kernel: 332; GCN-NOT: s40 333; GCN: ; def s40 334; GCN-NOT: s40 335; GCN: s_swappc_b64 336; GCN-NOT: s40 337; GCN: ; use s40 338; GCN-NOT: s40 339define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 { 340 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 341 %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0 342 call void @external_void_func_void() 343 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 344 call void asm sideeffect "; use $0", "v"(i32 %v32) #0 345 ret void 346} 347 348attributes #0 = { nounwind } 349attributes #1 = { nounwind readnone } 350attributes #2 = { nounwind noinline } 351attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 352