1; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s 2; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s 3 4; GCN-LABEL: {{^}}func_empty: 5; GCN-NOT: buffer_ 6; GCN-NOT: v_accvgpr 7; GCN: s_setpc_b64 8define void @func_empty() #0 { 9 ret void 10} 11 12; GCN-LABEL: {{^}}func_areg_4: 13; GCN-NOT: buffer_ 14; GCN-NOT: v_accvgpr 15; GCN: use agpr3 16; GCN-NOT: buffer_ 17; GCN-NOT: v_accvgpr 18; GCN: s_setpc_b64 19define void @func_areg_4() #0 { 20 call void asm sideeffect "; use agpr3", "~{a3}" () 21 ret void 22} 23 24; GCN-LABEL: {{^}}func_areg_32: 25; GCN-NOT: buffer_ 26; GCN-NOT: v_accvgpr 27; GCN: use agpr31 28; GCN-NOT: buffer_ 29; GCN-NOT: v_accvgpr 30; GCN: s_setpc_b64 31define void @func_areg_32() #0 { 32 call void asm sideeffect "; use agpr31", "~{a31}" () 33 ret void 34} 35 36; GCN-LABEL: {{^}}func_areg_33: 37; GCN-NOT: a32 38; GFX90A: v_accvgpr_read_b32 v0, a32 ; Reload Reuse 39; GCN-NOT: a32 40; GCN: use agpr32 41; GCN-NOT: a32 42; GFX90A: v_accvgpr_write_b32 a32, v0 ; Reload Reuse 43; GCN-NOT: a32 44; GCN: s_setpc_b64 45define void @func_areg_33() #0 { 46 call void asm sideeffect "; use agpr32", "~{a32}" () 47 ret void 48} 49 50; GCN-LABEL: {{^}}func_areg_64: 51; GFX908-NOT: buffer_ 52; GCN-NOT: v_accvgpr 53; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse 54; GCN: use agpr63 55; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse 56; GCN-NOT: v_accvgpr 57; GCN: s_setpc_b64 58define void @func_areg_64() #0 { 59 call void asm sideeffect "; use agpr63", "~{a63}" () 60 ret void 61} 62 63; GCN-LABEL: {{^}}func_areg_31_63: 64; GFX908-NOT: buffer_ 65; GFX908-NOT: v_accvgpr 66; GFX908-NOT: buffer 67; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse 68; GCN: use agpr31, agpr63 69; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse 70; GFX908-NOT: v_accvgpr 71; GFX908-NOT: buffer 72; GCN: s_setpc_b64 73define void @func_areg_31_63() #0 { 74 call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" () 75 ret void 76} 77 78declare void @func_unknown() #0 79 80; GCN-LABEL: {{^}}test_call_empty: 81; GCN-NOT: buffer_ 82; GCN-NOT: v_accvgpr 83; GCN: def a[0:31] 84; GFX908-COUNT-8: v_accvgpr_read_b32 85; GFX90A-NOT: v_accvgpr 86; GCN-NOT: buffer_ 87; GCN: s_swappc_b64 88; GCN-NOT: buffer_ 89; GFX90A-NOT: v_accvgpr 90; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] 91; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] 92; GCN: s_endpgm 93define amdgpu_kernel void @test_call_empty() #0 { 94bb: 95 %reg = call <32 x float> asm sideeffect "; def $0", "=a"() 96 call void @func_empty() 97 store volatile <32 x float> %reg, ptr addrspace(1) undef 98 ret void 99} 100 101; GCN-LABEL: {{^}}test_call_areg4: 102; GCN-NOT: buffer_ 103; GCN-NOT: v_accvgpr 104; GFX908: def a[0:31] 105; GFX90A: def a[4:35] 106; GFX908-COUNT-8: v_accvgpr_read_b32 107; GFX90A-NOT: v_accvgpr 108; GCN-NOT: buffer_ 109; GCN: s_swappc_b64 110; GCN-NOT: buffer_ 111; GFX90A-NOT: v_accvgpr 112; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] 113; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] 114; GCN: s_endpgm 115define amdgpu_kernel void @test_call_areg4() #0 { 116bb: 117 %reg = call <32 x float> asm sideeffect "; def $0", "=a"() 118 call void @func_areg_4() 119 store volatile <32 x float> %reg, ptr addrspace(1) undef 120 ret void 121} 122 123; GCN-LABEL: {{^}}test_call_areg32: 124; GCN-NOT: buffer_ 125; GCN-NOT: v_accvgpr 126; GFX908: def a[0:31] 127; GFX90A: def a[32:63] 128; GFX908-COUNT-8: v_accvgpr_read_b32 129; GFX90A-NOT: v_accvgpr 130; GCN-NOT: buffer_ 131; GCN: s_swappc_b64 132; GCN-NOT: buffer_ 133; GFX90A-NOT: v_accvgpr 134; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] 135; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] 136; GCN: s_endpgm 137define amdgpu_kernel void @test_call_areg32() #0 { 138bb: 139 %reg = call <32 x float> asm sideeffect "; def $0", "=a"() 140 call void @func_areg_32() 141 store volatile <32 x float> %reg, ptr addrspace(1) undef 142 ret void 143} 144 145; GCN-LABEL: {{^}}test_call_areg64: 146; GCN-NOT: buffer_ 147; GCN-NOT: v_accvgpr 148; GCN: def a[0:31] 149; GFX908-COUNT-8: v_accvgpr_read_b32 150; GFX90A-NOT: v_accvgpr 151; GCN-NOT: buffer_ 152; GCN: s_swappc_b64 153; GCN-NOT: buffer_ 154; GFX90A-NOT: v_accvgpr 155; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] 156; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] 157; GCN: s_endpgm 158define amdgpu_kernel void @test_call_areg64() #0 { 159bb: 160 %reg = call <32 x float> asm sideeffect "; def $0", "=a"() 161 call void @func_areg_64() 162 store volatile <32 x float> %reg, ptr addrspace(1) undef 163 ret void 164} 165 166; GCN-LABEL: {{^}}test_call_areg31_63: 167; GCN-NOT: buffer_ 168; GCN-NOT: v_accvgpr 169; GFX908: def a[0:31] 170; GFX90A: def a[32:63] 171; GFX908-COUNT-8: v_accvgpr_read_b32 172; GFX90A-NOT: v_accvgpr 173; GCN-NOT: buffer_ 174; GCN: s_swappc_b64 175; GCN-NOT: buffer_ 176; GFX90A-NOT: v_accvgpr 177; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] 178; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] 179; GCN: s_endpgm 180define amdgpu_kernel void @test_call_areg31_63() #0 { 181bb: 182 %reg = call <32 x float> asm sideeffect "; def $0", "=a"() 183 call void @func_areg_31_63() 184 store volatile <32 x float> %reg, ptr addrspace(1) undef 185 ret void 186} 187 188; GCN-LABEL: {{^}}test_call_unknown: 189; GCN-NOT: buffer_ 190; GCN-NOT: v_accvgpr 191; GFX908: def a[0:31] 192; GFX90A: def a[32:63] 193; GFX908-COUNT-8: v_accvgpr_read_b32 194; GFX90A-NOT: v_accvgpr 195; GCN-NOT: buffer_ 196; GCN: s_swappc_b64 197; GCN-NOT: buffer_ 198; GFX90A-NOT: v_accvgpr 199; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] 200; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] 201; GCN: s_endpgm 202define amdgpu_kernel void @test_call_unknown() #0 { 203bb: 204 %reg = call <32 x float> asm sideeffect "; def $0", "=a"() 205 call void @func_unknown() 206 store volatile <32 x float> %reg, ptr addrspace(1) undef 207 ret void 208} 209 210attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" } 211