1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s 4 5@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 6@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 7 8define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) { 9; GCN-LABEL: test_indirect_call_sgpr_ptr: 10; GCN: ; %bb.0: 11; GCN-NEXT: s_mov_b32 s32, 0 12; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 13; GCN-NEXT: s_add_i32 s12, s12, s17 14; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 15; GCN-NEXT: s_add_u32 s0, s0, s17 16; GCN-NEXT: s_addc_u32 s1, s1, 0 17; GCN-NEXT: s_mov_b32 s13, s15 18; GCN-NEXT: s_mov_b32 s12, s14 19; GCN-NEXT: s_getpc_b64 s[14:15] 20; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 21; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 22; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 23; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 24; GCN-NEXT: s_add_u32 s8, s8, 8 25; GCN-NEXT: s_addc_u32 s9, s9, 0 26; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 27; GCN-NEXT: v_or_b32_e32 v0, v0, v1 28; GCN-NEXT: v_or_b32_e32 v31, v0, v2 29; GCN-NEXT: s_mov_b32 s14, s16 30; GCN-NEXT: s_waitcnt lgkmcnt(0) 31; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] 32; GCN-NEXT: s_endpgm 33; 34; GISEL-LABEL: test_indirect_call_sgpr_ptr: 35; GISEL: ; %bb.0: 36; GISEL-NEXT: s_mov_b32 s32, 0 37; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 38; GISEL-NEXT: s_add_i32 s12, s12, s17 39; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 40; GISEL-NEXT: s_add_u32 s0, s0, s17 41; GISEL-NEXT: s_addc_u32 s1, s1, 0 42; GISEL-NEXT: s_mov_b32 s13, s15 43; GISEL-NEXT: s_mov_b32 s12, s14 44; GISEL-NEXT: s_getpc_b64 s[14:15] 45; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 46; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 47; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 48; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 49; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 50; GISEL-NEXT: s_add_u32 s8, s8, 8 51; GISEL-NEXT: s_addc_u32 s9, s9, 0 52; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 53; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 54; GISEL-NEXT: s_mov_b32 s14, s16 55; GISEL-NEXT: s_waitcnt lgkmcnt(0) 56; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] 57; GISEL-NEXT: s_endpgm 58 %fptr = load ptr, ptr addrspace(4) @gv.fptr0 59 call void %fptr() 60 ret void 61} 62 63define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) { 64; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: 65; GCN: ; %bb.0: 66; GCN-NEXT: s_mov_b32 s32, 0 67; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 68; GCN-NEXT: s_add_i32 s12, s12, s17 69; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 70; GCN-NEXT: s_add_u32 s0, s0, s17 71; GCN-NEXT: s_addc_u32 s1, s1, 0 72; GCN-NEXT: s_mov_b32 s13, s15 73; GCN-NEXT: s_mov_b32 s12, s14 74; GCN-NEXT: s_getpc_b64 s[14:15] 75; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 76; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 77; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 78; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 79; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 80; GCN-NEXT: s_add_u32 s8, s8, 8 81; GCN-NEXT: s_addc_u32 s9, s9, 0 82; GCN-NEXT: v_or_b32_e32 v0, v0, v1 83; GCN-NEXT: v_or_b32_e32 v31, v0, v2 84; GCN-NEXT: v_mov_b32_e32 v0, 0x7b 85; GCN-NEXT: s_mov_b32 s14, s16 86; GCN-NEXT: s_waitcnt lgkmcnt(0) 87; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] 88; GCN-NEXT: s_endpgm 89; 90; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg: 91; GISEL: ; %bb.0: 92; GISEL-NEXT: s_mov_b32 s32, 0 93; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 94; GISEL-NEXT: s_add_i32 s12, s12, s17 95; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 96; GISEL-NEXT: s_add_u32 s0, s0, s17 97; GISEL-NEXT: s_addc_u32 s1, s1, 0 98; GISEL-NEXT: s_mov_b32 s13, s15 99; GISEL-NEXT: s_mov_b32 s12, s14 100; GISEL-NEXT: s_getpc_b64 s[14:15] 101; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 102; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 103; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 104; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 105; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 106; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 107; GISEL-NEXT: s_add_u32 s8, s8, 8 108; GISEL-NEXT: s_addc_u32 s9, s9, 0 109; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 110; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 111; GISEL-NEXT: s_mov_b32 s14, s16 112; GISEL-NEXT: s_waitcnt lgkmcnt(0) 113; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] 114; GISEL-NEXT: s_endpgm 115 %fptr = load ptr, ptr addrspace(4) @gv.fptr1 116 call void %fptr(i32 123) 117 ret void 118} 119 120define void @test_indirect_call_vgpr_ptr(ptr %fptr) { 121; GCN-LABEL: test_indirect_call_vgpr_ptr: 122; GCN: ; %bb.0: 123; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 124; GCN-NEXT: s_mov_b32 s16, s33 125; GCN-NEXT: s_mov_b32 s33, s32 126; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 127; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 128; GCN-NEXT: s_mov_b64 exec, s[18:19] 129; GCN-NEXT: v_writelane_b32 v40, s16, 18 130; GCN-NEXT: s_addk_i32 s32, 0x400 131; GCN-NEXT: v_writelane_b32 v40, s30, 0 132; GCN-NEXT: v_writelane_b32 v40, s31, 1 133; GCN-NEXT: v_writelane_b32 v40, s34, 2 134; GCN-NEXT: v_writelane_b32 v40, s35, 3 135; GCN-NEXT: v_writelane_b32 v40, s36, 4 136; GCN-NEXT: v_writelane_b32 v40, s37, 5 137; GCN-NEXT: v_writelane_b32 v40, s38, 6 138; GCN-NEXT: v_writelane_b32 v40, s39, 7 139; GCN-NEXT: v_writelane_b32 v40, s40, 8 140; GCN-NEXT: v_writelane_b32 v40, s41, 9 141; GCN-NEXT: v_writelane_b32 v40, s42, 10 142; GCN-NEXT: v_writelane_b32 v40, s43, 11 143; GCN-NEXT: v_writelane_b32 v40, s44, 12 144; GCN-NEXT: v_writelane_b32 v40, s45, 13 145; GCN-NEXT: v_writelane_b32 v40, s46, 14 146; GCN-NEXT: v_writelane_b32 v40, s47, 15 147; GCN-NEXT: v_writelane_b32 v40, s48, 16 148; GCN-NEXT: v_writelane_b32 v40, s49, 17 149; GCN-NEXT: s_mov_b32 s42, s15 150; GCN-NEXT: s_mov_b32 s43, s14 151; GCN-NEXT: s_mov_b32 s44, s13 152; GCN-NEXT: s_mov_b32 s45, s12 153; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 154; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 155; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 156; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 157; GCN-NEXT: s_mov_b64 s[46:47], exec 158; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 159; GCN-NEXT: v_readfirstlane_b32 s16, v0 160; GCN-NEXT: v_readfirstlane_b32 s17, v1 161; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 162; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 163; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 164; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 165; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 166; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 167; GCN-NEXT: s_mov_b32 s12, s45 168; GCN-NEXT: s_mov_b32 s13, s44 169; GCN-NEXT: s_mov_b32 s14, s43 170; GCN-NEXT: s_mov_b32 s15, s42 171; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 172; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 173; GCN-NEXT: ; implicit-def: $vgpr31 174; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 175; GCN-NEXT: s_cbranch_execnz .LBB2_1 176; GCN-NEXT: ; %bb.2: 177; GCN-NEXT: s_mov_b64 exec, s[46:47] 178; GCN-NEXT: v_readlane_b32 s49, v40, 17 179; GCN-NEXT: v_readlane_b32 s48, v40, 16 180; GCN-NEXT: v_readlane_b32 s47, v40, 15 181; GCN-NEXT: v_readlane_b32 s46, v40, 14 182; GCN-NEXT: v_readlane_b32 s45, v40, 13 183; GCN-NEXT: v_readlane_b32 s44, v40, 12 184; GCN-NEXT: v_readlane_b32 s43, v40, 11 185; GCN-NEXT: v_readlane_b32 s42, v40, 10 186; GCN-NEXT: v_readlane_b32 s41, v40, 9 187; GCN-NEXT: v_readlane_b32 s40, v40, 8 188; GCN-NEXT: v_readlane_b32 s39, v40, 7 189; GCN-NEXT: v_readlane_b32 s38, v40, 6 190; GCN-NEXT: v_readlane_b32 s37, v40, 5 191; GCN-NEXT: v_readlane_b32 s36, v40, 4 192; GCN-NEXT: v_readlane_b32 s35, v40, 3 193; GCN-NEXT: v_readlane_b32 s34, v40, 2 194; GCN-NEXT: v_readlane_b32 s31, v40, 1 195; GCN-NEXT: v_readlane_b32 s30, v40, 0 196; GCN-NEXT: s_mov_b32 s32, s33 197; GCN-NEXT: v_readlane_b32 s4, v40, 18 198; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 199; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 200; GCN-NEXT: s_mov_b64 exec, s[6:7] 201; GCN-NEXT: s_mov_b32 s33, s4 202; GCN-NEXT: s_waitcnt vmcnt(0) 203; GCN-NEXT: s_setpc_b64 s[30:31] 204; 205; GISEL-LABEL: test_indirect_call_vgpr_ptr: 206; GISEL: ; %bb.0: 207; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GISEL-NEXT: s_mov_b32 s16, s33 209; GISEL-NEXT: s_mov_b32 s33, s32 210; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 211; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 212; GISEL-NEXT: s_mov_b64 exec, s[18:19] 213; GISEL-NEXT: v_writelane_b32 v40, s16, 18 214; GISEL-NEXT: s_addk_i32 s32, 0x400 215; GISEL-NEXT: v_writelane_b32 v40, s30, 0 216; GISEL-NEXT: v_writelane_b32 v40, s31, 1 217; GISEL-NEXT: v_writelane_b32 v40, s34, 2 218; GISEL-NEXT: v_writelane_b32 v40, s35, 3 219; GISEL-NEXT: v_writelane_b32 v40, s36, 4 220; GISEL-NEXT: v_writelane_b32 v40, s37, 5 221; GISEL-NEXT: v_writelane_b32 v40, s38, 6 222; GISEL-NEXT: v_writelane_b32 v40, s39, 7 223; GISEL-NEXT: v_writelane_b32 v40, s40, 8 224; GISEL-NEXT: v_writelane_b32 v40, s41, 9 225; GISEL-NEXT: v_writelane_b32 v40, s42, 10 226; GISEL-NEXT: v_writelane_b32 v40, s43, 11 227; GISEL-NEXT: v_writelane_b32 v40, s44, 12 228; GISEL-NEXT: v_writelane_b32 v40, s45, 13 229; GISEL-NEXT: v_writelane_b32 v40, s46, 14 230; GISEL-NEXT: v_writelane_b32 v40, s47, 15 231; GISEL-NEXT: v_writelane_b32 v40, s48, 16 232; GISEL-NEXT: v_writelane_b32 v40, s49, 17 233; GISEL-NEXT: s_mov_b32 s42, s15 234; GISEL-NEXT: s_mov_b32 s43, s14 235; GISEL-NEXT: s_mov_b32 s44, s13 236; GISEL-NEXT: s_mov_b32 s45, s12 237; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 238; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 239; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 240; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 241; GISEL-NEXT: s_mov_b64 s[46:47], exec 242; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 243; GISEL-NEXT: v_readfirstlane_b32 s16, v0 244; GISEL-NEXT: v_readfirstlane_b32 s17, v1 245; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 246; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 247; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 248; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 249; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 250; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 251; GISEL-NEXT: s_mov_b32 s12, s45 252; GISEL-NEXT: s_mov_b32 s13, s44 253; GISEL-NEXT: s_mov_b32 s14, s43 254; GISEL-NEXT: s_mov_b32 s15, s42 255; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 256; GISEL-NEXT: ; implicit-def: $vgpr0 257; GISEL-NEXT: ; implicit-def: $vgpr31 258; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 259; GISEL-NEXT: s_cbranch_execnz .LBB2_1 260; GISEL-NEXT: ; %bb.2: 261; GISEL-NEXT: s_mov_b64 exec, s[46:47] 262; GISEL-NEXT: v_readlane_b32 s49, v40, 17 263; GISEL-NEXT: v_readlane_b32 s48, v40, 16 264; GISEL-NEXT: v_readlane_b32 s47, v40, 15 265; GISEL-NEXT: v_readlane_b32 s46, v40, 14 266; GISEL-NEXT: v_readlane_b32 s45, v40, 13 267; GISEL-NEXT: v_readlane_b32 s44, v40, 12 268; GISEL-NEXT: v_readlane_b32 s43, v40, 11 269; GISEL-NEXT: v_readlane_b32 s42, v40, 10 270; GISEL-NEXT: v_readlane_b32 s41, v40, 9 271; GISEL-NEXT: v_readlane_b32 s40, v40, 8 272; GISEL-NEXT: v_readlane_b32 s39, v40, 7 273; GISEL-NEXT: v_readlane_b32 s38, v40, 6 274; GISEL-NEXT: v_readlane_b32 s37, v40, 5 275; GISEL-NEXT: v_readlane_b32 s36, v40, 4 276; GISEL-NEXT: v_readlane_b32 s35, v40, 3 277; GISEL-NEXT: v_readlane_b32 s34, v40, 2 278; GISEL-NEXT: v_readlane_b32 s31, v40, 1 279; GISEL-NEXT: v_readlane_b32 s30, v40, 0 280; GISEL-NEXT: s_mov_b32 s32, s33 281; GISEL-NEXT: v_readlane_b32 s4, v40, 18 282; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 283; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 284; GISEL-NEXT: s_mov_b64 exec, s[6:7] 285; GISEL-NEXT: s_mov_b32 s33, s4 286; GISEL-NEXT: s_waitcnt vmcnt(0) 287; GISEL-NEXT: s_setpc_b64 s[30:31] 288 call void %fptr() 289 ret void 290} 291 292define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { 293; GCN-LABEL: test_indirect_call_vgpr_ptr_arg: 294; GCN: ; %bb.0: 295; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GCN-NEXT: s_mov_b32 s16, s33 297; GCN-NEXT: s_mov_b32 s33, s32 298; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 299; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 300; GCN-NEXT: s_mov_b64 exec, s[18:19] 301; GCN-NEXT: v_writelane_b32 v40, s16, 18 302; GCN-NEXT: s_addk_i32 s32, 0x400 303; GCN-NEXT: v_writelane_b32 v40, s30, 0 304; GCN-NEXT: v_writelane_b32 v40, s31, 1 305; GCN-NEXT: v_writelane_b32 v40, s34, 2 306; GCN-NEXT: v_writelane_b32 v40, s35, 3 307; GCN-NEXT: v_writelane_b32 v40, s36, 4 308; GCN-NEXT: v_writelane_b32 v40, s37, 5 309; GCN-NEXT: v_writelane_b32 v40, s38, 6 310; GCN-NEXT: v_writelane_b32 v40, s39, 7 311; GCN-NEXT: v_writelane_b32 v40, s40, 8 312; GCN-NEXT: v_writelane_b32 v40, s41, 9 313; GCN-NEXT: v_writelane_b32 v40, s42, 10 314; GCN-NEXT: v_writelane_b32 v40, s43, 11 315; GCN-NEXT: v_writelane_b32 v40, s44, 12 316; GCN-NEXT: v_writelane_b32 v40, s45, 13 317; GCN-NEXT: v_writelane_b32 v40, s46, 14 318; GCN-NEXT: v_writelane_b32 v40, s47, 15 319; GCN-NEXT: v_writelane_b32 v40, s48, 16 320; GCN-NEXT: v_writelane_b32 v40, s49, 17 321; GCN-NEXT: s_mov_b32 s42, s15 322; GCN-NEXT: s_mov_b32 s43, s14 323; GCN-NEXT: s_mov_b32 s44, s13 324; GCN-NEXT: s_mov_b32 s45, s12 325; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 326; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 327; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 328; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 329; GCN-NEXT: s_mov_b64 s[46:47], exec 330; GCN-NEXT: v_mov_b32_e32 v2, 0x7b 331; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 332; GCN-NEXT: v_readfirstlane_b32 s16, v0 333; GCN-NEXT: v_readfirstlane_b32 s17, v1 334; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 335; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 336; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 337; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 338; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 339; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 340; GCN-NEXT: s_mov_b32 s12, s45 341; GCN-NEXT: s_mov_b32 s13, s44 342; GCN-NEXT: s_mov_b32 s14, s43 343; GCN-NEXT: s_mov_b32 s15, s42 344; GCN-NEXT: v_mov_b32_e32 v0, v2 345; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 346; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 347; GCN-NEXT: ; implicit-def: $vgpr31 348; GCN-NEXT: ; implicit-def: $vgpr2 349; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 350; GCN-NEXT: s_cbranch_execnz .LBB3_1 351; GCN-NEXT: ; %bb.2: 352; GCN-NEXT: s_mov_b64 exec, s[46:47] 353; GCN-NEXT: v_readlane_b32 s49, v40, 17 354; GCN-NEXT: v_readlane_b32 s48, v40, 16 355; GCN-NEXT: v_readlane_b32 s47, v40, 15 356; GCN-NEXT: v_readlane_b32 s46, v40, 14 357; GCN-NEXT: v_readlane_b32 s45, v40, 13 358; GCN-NEXT: v_readlane_b32 s44, v40, 12 359; GCN-NEXT: v_readlane_b32 s43, v40, 11 360; GCN-NEXT: v_readlane_b32 s42, v40, 10 361; GCN-NEXT: v_readlane_b32 s41, v40, 9 362; GCN-NEXT: v_readlane_b32 s40, v40, 8 363; GCN-NEXT: v_readlane_b32 s39, v40, 7 364; GCN-NEXT: v_readlane_b32 s38, v40, 6 365; GCN-NEXT: v_readlane_b32 s37, v40, 5 366; GCN-NEXT: v_readlane_b32 s36, v40, 4 367; GCN-NEXT: v_readlane_b32 s35, v40, 3 368; GCN-NEXT: v_readlane_b32 s34, v40, 2 369; GCN-NEXT: v_readlane_b32 s31, v40, 1 370; GCN-NEXT: v_readlane_b32 s30, v40, 0 371; GCN-NEXT: s_mov_b32 s32, s33 372; GCN-NEXT: v_readlane_b32 s4, v40, 18 373; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 374; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 375; GCN-NEXT: s_mov_b64 exec, s[6:7] 376; GCN-NEXT: s_mov_b32 s33, s4 377; GCN-NEXT: s_waitcnt vmcnt(0) 378; GCN-NEXT: s_setpc_b64 s[30:31] 379; 380; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: 381; GISEL: ; %bb.0: 382; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 383; GISEL-NEXT: s_mov_b32 s16, s33 384; GISEL-NEXT: s_mov_b32 s33, s32 385; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 386; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 387; GISEL-NEXT: s_mov_b64 exec, s[18:19] 388; GISEL-NEXT: v_writelane_b32 v40, s16, 18 389; GISEL-NEXT: s_addk_i32 s32, 0x400 390; GISEL-NEXT: v_writelane_b32 v40, s30, 0 391; GISEL-NEXT: v_writelane_b32 v40, s31, 1 392; GISEL-NEXT: v_writelane_b32 v40, s34, 2 393; GISEL-NEXT: v_writelane_b32 v40, s35, 3 394; GISEL-NEXT: v_writelane_b32 v40, s36, 4 395; GISEL-NEXT: v_writelane_b32 v40, s37, 5 396; GISEL-NEXT: v_writelane_b32 v40, s38, 6 397; GISEL-NEXT: v_writelane_b32 v40, s39, 7 398; GISEL-NEXT: v_writelane_b32 v40, s40, 8 399; GISEL-NEXT: v_writelane_b32 v40, s41, 9 400; GISEL-NEXT: v_writelane_b32 v40, s42, 10 401; GISEL-NEXT: v_writelane_b32 v40, s43, 11 402; GISEL-NEXT: v_writelane_b32 v40, s44, 12 403; GISEL-NEXT: v_writelane_b32 v40, s45, 13 404; GISEL-NEXT: v_writelane_b32 v40, s46, 14 405; GISEL-NEXT: v_writelane_b32 v40, s47, 15 406; GISEL-NEXT: v_writelane_b32 v40, s48, 16 407; GISEL-NEXT: v_writelane_b32 v40, s49, 17 408; GISEL-NEXT: s_mov_b32 s42, s15 409; GISEL-NEXT: s_mov_b32 s43, s14 410; GISEL-NEXT: s_mov_b32 s44, s13 411; GISEL-NEXT: s_mov_b32 s45, s12 412; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 413; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 414; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 415; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 416; GISEL-NEXT: s_mov_b64 s[46:47], exec 417; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 418; GISEL-NEXT: v_readfirstlane_b32 s16, v0 419; GISEL-NEXT: v_readfirstlane_b32 s17, v1 420; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 421; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 422; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 423; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 424; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 425; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 426; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 427; GISEL-NEXT: s_mov_b32 s12, s45 428; GISEL-NEXT: s_mov_b32 s13, s44 429; GISEL-NEXT: s_mov_b32 s14, s43 430; GISEL-NEXT: s_mov_b32 s15, s42 431; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 432; GISEL-NEXT: ; implicit-def: $vgpr0 433; GISEL-NEXT: ; implicit-def: $vgpr31 434; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 435; GISEL-NEXT: s_cbranch_execnz .LBB3_1 436; GISEL-NEXT: ; %bb.2: 437; GISEL-NEXT: s_mov_b64 exec, s[46:47] 438; GISEL-NEXT: v_readlane_b32 s49, v40, 17 439; GISEL-NEXT: v_readlane_b32 s48, v40, 16 440; GISEL-NEXT: v_readlane_b32 s47, v40, 15 441; GISEL-NEXT: v_readlane_b32 s46, v40, 14 442; GISEL-NEXT: v_readlane_b32 s45, v40, 13 443; GISEL-NEXT: v_readlane_b32 s44, v40, 12 444; GISEL-NEXT: v_readlane_b32 s43, v40, 11 445; GISEL-NEXT: v_readlane_b32 s42, v40, 10 446; GISEL-NEXT: v_readlane_b32 s41, v40, 9 447; GISEL-NEXT: v_readlane_b32 s40, v40, 8 448; GISEL-NEXT: v_readlane_b32 s39, v40, 7 449; GISEL-NEXT: v_readlane_b32 s38, v40, 6 450; GISEL-NEXT: v_readlane_b32 s37, v40, 5 451; GISEL-NEXT: v_readlane_b32 s36, v40, 4 452; GISEL-NEXT: v_readlane_b32 s35, v40, 3 453; GISEL-NEXT: v_readlane_b32 s34, v40, 2 454; GISEL-NEXT: v_readlane_b32 s31, v40, 1 455; GISEL-NEXT: v_readlane_b32 s30, v40, 0 456; GISEL-NEXT: s_mov_b32 s32, s33 457; GISEL-NEXT: v_readlane_b32 s4, v40, 18 458; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 459; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 460; GISEL-NEXT: s_mov_b64 exec, s[6:7] 461; GISEL-NEXT: s_mov_b32 s33, s4 462; GISEL-NEXT: s_waitcnt vmcnt(0) 463; GISEL-NEXT: s_setpc_b64 s[30:31] 464 call void %fptr(i32 123) 465 ret void 466} 467 468define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { 469; GCN-LABEL: test_indirect_call_vgpr_ptr_ret: 470; GCN: ; %bb.0: 471; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472; GCN-NEXT: s_mov_b32 s16, s33 473; GCN-NEXT: s_mov_b32 s33, s32 474; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 475; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 476; GCN-NEXT: s_mov_b64 exec, s[18:19] 477; GCN-NEXT: v_writelane_b32 v40, s16, 18 478; GCN-NEXT: s_addk_i32 s32, 0x400 479; GCN-NEXT: v_writelane_b32 v40, s30, 0 480; GCN-NEXT: v_writelane_b32 v40, s31, 1 481; GCN-NEXT: v_writelane_b32 v40, s34, 2 482; GCN-NEXT: v_writelane_b32 v40, s35, 3 483; GCN-NEXT: v_writelane_b32 v40, s36, 4 484; GCN-NEXT: v_writelane_b32 v40, s37, 5 485; GCN-NEXT: v_writelane_b32 v40, s38, 6 486; GCN-NEXT: v_writelane_b32 v40, s39, 7 487; GCN-NEXT: v_writelane_b32 v40, s40, 8 488; GCN-NEXT: v_writelane_b32 v40, s41, 9 489; GCN-NEXT: v_writelane_b32 v40, s42, 10 490; GCN-NEXT: v_writelane_b32 v40, s43, 11 491; GCN-NEXT: v_writelane_b32 v40, s44, 12 492; GCN-NEXT: v_writelane_b32 v40, s45, 13 493; GCN-NEXT: v_writelane_b32 v40, s46, 14 494; GCN-NEXT: v_writelane_b32 v40, s47, 15 495; GCN-NEXT: v_writelane_b32 v40, s48, 16 496; GCN-NEXT: v_writelane_b32 v40, s49, 17 497; GCN-NEXT: s_mov_b32 s42, s15 498; GCN-NEXT: s_mov_b32 s43, s14 499; GCN-NEXT: s_mov_b32 s44, s13 500; GCN-NEXT: s_mov_b32 s45, s12 501; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 502; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 503; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 504; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 505; GCN-NEXT: s_mov_b64 s[46:47], exec 506; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 507; GCN-NEXT: v_readfirstlane_b32 s16, v0 508; GCN-NEXT: v_readfirstlane_b32 s17, v1 509; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 510; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 511; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 512; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 513; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 514; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 515; GCN-NEXT: s_mov_b32 s12, s45 516; GCN-NEXT: s_mov_b32 s13, s44 517; GCN-NEXT: s_mov_b32 s14, s43 518; GCN-NEXT: s_mov_b32 s15, s42 519; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 520; GCN-NEXT: v_mov_b32_e32 v2, v0 521; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 522; GCN-NEXT: ; implicit-def: $vgpr31 523; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 524; GCN-NEXT: s_cbranch_execnz .LBB4_1 525; GCN-NEXT: ; %bb.2: 526; GCN-NEXT: s_mov_b64 exec, s[46:47] 527; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 528; GCN-NEXT: v_readlane_b32 s49, v40, 17 529; GCN-NEXT: v_readlane_b32 s48, v40, 16 530; GCN-NEXT: v_readlane_b32 s47, v40, 15 531; GCN-NEXT: v_readlane_b32 s46, v40, 14 532; GCN-NEXT: v_readlane_b32 s45, v40, 13 533; GCN-NEXT: v_readlane_b32 s44, v40, 12 534; GCN-NEXT: v_readlane_b32 s43, v40, 11 535; GCN-NEXT: v_readlane_b32 s42, v40, 10 536; GCN-NEXT: v_readlane_b32 s41, v40, 9 537; GCN-NEXT: v_readlane_b32 s40, v40, 8 538; GCN-NEXT: v_readlane_b32 s39, v40, 7 539; GCN-NEXT: v_readlane_b32 s38, v40, 6 540; GCN-NEXT: v_readlane_b32 s37, v40, 5 541; GCN-NEXT: v_readlane_b32 s36, v40, 4 542; GCN-NEXT: v_readlane_b32 s35, v40, 3 543; GCN-NEXT: v_readlane_b32 s34, v40, 2 544; GCN-NEXT: v_readlane_b32 s31, v40, 1 545; GCN-NEXT: v_readlane_b32 s30, v40, 0 546; GCN-NEXT: s_mov_b32 s32, s33 547; GCN-NEXT: v_readlane_b32 s4, v40, 18 548; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 549; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 550; GCN-NEXT: s_mov_b64 exec, s[6:7] 551; GCN-NEXT: s_mov_b32 s33, s4 552; GCN-NEXT: s_waitcnt vmcnt(0) 553; GCN-NEXT: s_setpc_b64 s[30:31] 554; 555; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: 556; GISEL: ; %bb.0: 557; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 558; GISEL-NEXT: s_mov_b32 s16, s33 559; GISEL-NEXT: s_mov_b32 s33, s32 560; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 561; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 562; GISEL-NEXT: s_mov_b64 exec, s[18:19] 563; GISEL-NEXT: v_writelane_b32 v40, s16, 18 564; GISEL-NEXT: s_addk_i32 s32, 0x400 565; GISEL-NEXT: v_writelane_b32 v40, s30, 0 566; GISEL-NEXT: v_writelane_b32 v40, s31, 1 567; GISEL-NEXT: v_writelane_b32 v40, s34, 2 568; GISEL-NEXT: v_writelane_b32 v40, s35, 3 569; GISEL-NEXT: v_writelane_b32 v40, s36, 4 570; GISEL-NEXT: v_writelane_b32 v40, s37, 5 571; GISEL-NEXT: v_writelane_b32 v40, s38, 6 572; GISEL-NEXT: v_writelane_b32 v40, s39, 7 573; GISEL-NEXT: v_writelane_b32 v40, s40, 8 574; GISEL-NEXT: v_writelane_b32 v40, s41, 9 575; GISEL-NEXT: v_writelane_b32 v40, s42, 10 576; GISEL-NEXT: v_writelane_b32 v40, s43, 11 577; GISEL-NEXT: v_writelane_b32 v40, s44, 12 578; GISEL-NEXT: v_writelane_b32 v40, s45, 13 579; GISEL-NEXT: v_writelane_b32 v40, s46, 14 580; GISEL-NEXT: v_writelane_b32 v40, s47, 15 581; GISEL-NEXT: v_writelane_b32 v40, s48, 16 582; GISEL-NEXT: v_writelane_b32 v40, s49, 17 583; GISEL-NEXT: s_mov_b32 s42, s15 584; GISEL-NEXT: s_mov_b32 s43, s14 585; GISEL-NEXT: s_mov_b32 s44, s13 586; GISEL-NEXT: s_mov_b32 s45, s12 587; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 588; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 589; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 590; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 591; GISEL-NEXT: s_mov_b64 s[46:47], exec 592; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 593; GISEL-NEXT: v_readfirstlane_b32 s16, v0 594; GISEL-NEXT: v_readfirstlane_b32 s17, v1 595; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 596; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 597; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 598; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 599; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 600; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 601; GISEL-NEXT: s_mov_b32 s12, s45 602; GISEL-NEXT: s_mov_b32 s13, s44 603; GISEL-NEXT: s_mov_b32 s14, s43 604; GISEL-NEXT: s_mov_b32 s15, s42 605; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 606; GISEL-NEXT: v_mov_b32_e32 v1, v0 607; GISEL-NEXT: ; implicit-def: $vgpr0 608; GISEL-NEXT: ; implicit-def: $vgpr31 609; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 610; GISEL-NEXT: s_cbranch_execnz .LBB4_1 611; GISEL-NEXT: ; %bb.2: 612; GISEL-NEXT: s_mov_b64 exec, s[46:47] 613; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 614; GISEL-NEXT: v_readlane_b32 s49, v40, 17 615; GISEL-NEXT: v_readlane_b32 s48, v40, 16 616; GISEL-NEXT: v_readlane_b32 s47, v40, 15 617; GISEL-NEXT: v_readlane_b32 s46, v40, 14 618; GISEL-NEXT: v_readlane_b32 s45, v40, 13 619; GISEL-NEXT: v_readlane_b32 s44, v40, 12 620; GISEL-NEXT: v_readlane_b32 s43, v40, 11 621; GISEL-NEXT: v_readlane_b32 s42, v40, 10 622; GISEL-NEXT: v_readlane_b32 s41, v40, 9 623; GISEL-NEXT: v_readlane_b32 s40, v40, 8 624; GISEL-NEXT: v_readlane_b32 s39, v40, 7 625; GISEL-NEXT: v_readlane_b32 s38, v40, 6 626; GISEL-NEXT: v_readlane_b32 s37, v40, 5 627; GISEL-NEXT: v_readlane_b32 s36, v40, 4 628; GISEL-NEXT: v_readlane_b32 s35, v40, 3 629; GISEL-NEXT: v_readlane_b32 s34, v40, 2 630; GISEL-NEXT: v_readlane_b32 s31, v40, 1 631; GISEL-NEXT: v_readlane_b32 s30, v40, 0 632; GISEL-NEXT: s_mov_b32 s32, s33 633; GISEL-NEXT: v_readlane_b32 s4, v40, 18 634; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 635; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 636; GISEL-NEXT: s_mov_b64 exec, s[6:7] 637; GISEL-NEXT: s_mov_b32 s33, s4 638; GISEL-NEXT: s_waitcnt vmcnt(0) 639; GISEL-NEXT: s_setpc_b64 s[30:31] 640 %a = call i32 %fptr() 641 %b = add i32 %a, 1 642 ret i32 %b 643} 644 645define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { 646; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch: 647; GCN: ; %bb.0: ; %bb0 648; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GCN-NEXT: s_mov_b32 s16, s33 650; GCN-NEXT: s_mov_b32 s33, s32 651; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 652; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 653; GCN-NEXT: s_mov_b64 exec, s[18:19] 654; GCN-NEXT: v_writelane_b32 v40, s16, 20 655; GCN-NEXT: s_addk_i32 s32, 0x400 656; GCN-NEXT: v_writelane_b32 v40, s30, 0 657; GCN-NEXT: v_writelane_b32 v40, s31, 1 658; GCN-NEXT: v_writelane_b32 v40, s34, 2 659; GCN-NEXT: v_writelane_b32 v40, s35, 3 660; GCN-NEXT: v_writelane_b32 v40, s36, 4 661; GCN-NEXT: v_writelane_b32 v40, s37, 5 662; GCN-NEXT: v_writelane_b32 v40, s38, 6 663; GCN-NEXT: v_writelane_b32 v40, s39, 7 664; GCN-NEXT: v_writelane_b32 v40, s40, 8 665; GCN-NEXT: v_writelane_b32 v40, s41, 9 666; GCN-NEXT: v_writelane_b32 v40, s42, 10 667; GCN-NEXT: v_writelane_b32 v40, s43, 11 668; GCN-NEXT: v_writelane_b32 v40, s44, 12 669; GCN-NEXT: v_writelane_b32 v40, s45, 13 670; GCN-NEXT: v_writelane_b32 v40, s46, 14 671; GCN-NEXT: v_writelane_b32 v40, s47, 15 672; GCN-NEXT: v_writelane_b32 v40, s48, 16 673; GCN-NEXT: v_writelane_b32 v40, s49, 17 674; GCN-NEXT: v_writelane_b32 v40, s50, 18 675; GCN-NEXT: v_writelane_b32 v40, s51, 19 676; GCN-NEXT: s_mov_b32 s42, s15 677; GCN-NEXT: s_mov_b32 s43, s14 678; GCN-NEXT: s_mov_b32 s44, s13 679; GCN-NEXT: s_mov_b32 s45, s12 680; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 681; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 682; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 683; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 684; GCN-NEXT: v_and_b32_e32 v2, 1, v2 685; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 686; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc 687; GCN-NEXT: s_cbranch_execz .LBB5_4 688; GCN-NEXT: ; %bb.1: ; %bb1 689; GCN-NEXT: s_mov_b64 s[48:49], exec 690; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 691; GCN-NEXT: v_readfirstlane_b32 s16, v0 692; GCN-NEXT: v_readfirstlane_b32 s17, v1 693; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 694; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc 695; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 696; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 697; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 698; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 699; GCN-NEXT: s_mov_b32 s12, s45 700; GCN-NEXT: s_mov_b32 s13, s44 701; GCN-NEXT: s_mov_b32 s14, s43 702; GCN-NEXT: s_mov_b32 s15, s42 703; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 704; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 705; GCN-NEXT: ; implicit-def: $vgpr31 706; GCN-NEXT: s_xor_b64 exec, exec, s[50:51] 707; GCN-NEXT: s_cbranch_execnz .LBB5_2 708; GCN-NEXT: ; %bb.3: 709; GCN-NEXT: s_mov_b64 exec, s[48:49] 710; GCN-NEXT: .LBB5_4: ; %bb2 711; GCN-NEXT: s_or_b64 exec, exec, s[46:47] 712; GCN-NEXT: v_readlane_b32 s51, v40, 19 713; GCN-NEXT: v_readlane_b32 s50, v40, 18 714; GCN-NEXT: v_readlane_b32 s49, v40, 17 715; GCN-NEXT: v_readlane_b32 s48, v40, 16 716; GCN-NEXT: v_readlane_b32 s47, v40, 15 717; GCN-NEXT: v_readlane_b32 s46, v40, 14 718; GCN-NEXT: v_readlane_b32 s45, v40, 13 719; GCN-NEXT: v_readlane_b32 s44, v40, 12 720; GCN-NEXT: v_readlane_b32 s43, v40, 11 721; GCN-NEXT: v_readlane_b32 s42, v40, 10 722; GCN-NEXT: v_readlane_b32 s41, v40, 9 723; GCN-NEXT: v_readlane_b32 s40, v40, 8 724; GCN-NEXT: v_readlane_b32 s39, v40, 7 725; GCN-NEXT: v_readlane_b32 s38, v40, 6 726; GCN-NEXT: v_readlane_b32 s37, v40, 5 727; GCN-NEXT: v_readlane_b32 s36, v40, 4 728; GCN-NEXT: v_readlane_b32 s35, v40, 3 729; GCN-NEXT: v_readlane_b32 s34, v40, 2 730; GCN-NEXT: v_readlane_b32 s31, v40, 1 731; GCN-NEXT: v_readlane_b32 s30, v40, 0 732; GCN-NEXT: s_mov_b32 s32, s33 733; GCN-NEXT: v_readlane_b32 s4, v40, 20 734; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 735; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 736; GCN-NEXT: s_mov_b64 exec, s[6:7] 737; GCN-NEXT: s_mov_b32 s33, s4 738; GCN-NEXT: s_waitcnt vmcnt(0) 739; GCN-NEXT: s_setpc_b64 s[30:31] 740; 741; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch: 742; GISEL: ; %bb.0: ; %bb0 743; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 744; GISEL-NEXT: s_mov_b32 s16, s33 745; GISEL-NEXT: s_mov_b32 s33, s32 746; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 747; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 748; GISEL-NEXT: s_mov_b64 exec, s[18:19] 749; GISEL-NEXT: v_writelane_b32 v40, s16, 20 750; GISEL-NEXT: s_addk_i32 s32, 0x400 751; GISEL-NEXT: v_writelane_b32 v40, s30, 0 752; GISEL-NEXT: v_writelane_b32 v40, s31, 1 753; GISEL-NEXT: v_writelane_b32 v40, s34, 2 754; GISEL-NEXT: v_writelane_b32 v40, s35, 3 755; GISEL-NEXT: v_writelane_b32 v40, s36, 4 756; GISEL-NEXT: v_writelane_b32 v40, s37, 5 757; GISEL-NEXT: v_writelane_b32 v40, s38, 6 758; GISEL-NEXT: v_writelane_b32 v40, s39, 7 759; GISEL-NEXT: v_writelane_b32 v40, s40, 8 760; GISEL-NEXT: v_writelane_b32 v40, s41, 9 761; GISEL-NEXT: v_writelane_b32 v40, s42, 10 762; GISEL-NEXT: v_writelane_b32 v40, s43, 11 763; GISEL-NEXT: v_writelane_b32 v40, s44, 12 764; GISEL-NEXT: v_writelane_b32 v40, s45, 13 765; GISEL-NEXT: v_writelane_b32 v40, s46, 14 766; GISEL-NEXT: v_writelane_b32 v40, s47, 15 767; GISEL-NEXT: v_writelane_b32 v40, s48, 16 768; GISEL-NEXT: v_writelane_b32 v40, s49, 17 769; GISEL-NEXT: v_writelane_b32 v40, s50, 18 770; GISEL-NEXT: v_writelane_b32 v40, s51, 19 771; GISEL-NEXT: s_mov_b32 s42, s15 772; GISEL-NEXT: s_mov_b32 s43, s14 773; GISEL-NEXT: s_mov_b32 s44, s13 774; GISEL-NEXT: s_mov_b32 s45, s12 775; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 776; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 777; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 778; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 779; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 780; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 781; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc 782; GISEL-NEXT: s_cbranch_execz .LBB5_4 783; GISEL-NEXT: ; %bb.1: ; %bb1 784; GISEL-NEXT: s_mov_b64 s[48:49], exec 785; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 786; GISEL-NEXT: v_readfirstlane_b32 s16, v0 787; GISEL-NEXT: v_readfirstlane_b32 s17, v1 788; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 789; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc 790; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 791; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 792; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 793; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 794; GISEL-NEXT: s_mov_b32 s12, s45 795; GISEL-NEXT: s_mov_b32 s13, s44 796; GISEL-NEXT: s_mov_b32 s14, s43 797; GISEL-NEXT: s_mov_b32 s15, s42 798; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 799; GISEL-NEXT: ; implicit-def: $vgpr0 800; GISEL-NEXT: ; implicit-def: $vgpr31 801; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] 802; GISEL-NEXT: s_cbranch_execnz .LBB5_2 803; GISEL-NEXT: ; %bb.3: 804; GISEL-NEXT: s_mov_b64 exec, s[48:49] 805; GISEL-NEXT: .LBB5_4: ; %bb2 806; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] 807; GISEL-NEXT: v_readlane_b32 s51, v40, 19 808; GISEL-NEXT: v_readlane_b32 s50, v40, 18 809; GISEL-NEXT: v_readlane_b32 s49, v40, 17 810; GISEL-NEXT: v_readlane_b32 s48, v40, 16 811; GISEL-NEXT: v_readlane_b32 s47, v40, 15 812; GISEL-NEXT: v_readlane_b32 s46, v40, 14 813; GISEL-NEXT: v_readlane_b32 s45, v40, 13 814; GISEL-NEXT: v_readlane_b32 s44, v40, 12 815; GISEL-NEXT: v_readlane_b32 s43, v40, 11 816; GISEL-NEXT: v_readlane_b32 s42, v40, 10 817; GISEL-NEXT: v_readlane_b32 s41, v40, 9 818; GISEL-NEXT: v_readlane_b32 s40, v40, 8 819; GISEL-NEXT: v_readlane_b32 s39, v40, 7 820; GISEL-NEXT: v_readlane_b32 s38, v40, 6 821; GISEL-NEXT: v_readlane_b32 s37, v40, 5 822; GISEL-NEXT: v_readlane_b32 s36, v40, 4 823; GISEL-NEXT: v_readlane_b32 s35, v40, 3 824; GISEL-NEXT: v_readlane_b32 s34, v40, 2 825; GISEL-NEXT: v_readlane_b32 s31, v40, 1 826; GISEL-NEXT: v_readlane_b32 s30, v40, 0 827; GISEL-NEXT: s_mov_b32 s32, s33 828; GISEL-NEXT: v_readlane_b32 s4, v40, 20 829; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 830; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 831; GISEL-NEXT: s_mov_b64 exec, s[6:7] 832; GISEL-NEXT: s_mov_b32 s33, s4 833; GISEL-NEXT: s_waitcnt vmcnt(0) 834; GISEL-NEXT: s_setpc_b64 s[30:31] 835bb0: 836 br i1 %cond, label %bb1, label %bb2 837 838bb1: 839 call void %fptr() 840 br label %bb2 841 842bb2: 843 ret void 844} 845 846define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { 847; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: 848; GCN: ; %bb.0: 849; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GCN-NEXT: s_mov_b32 s5, s33 851; GCN-NEXT: s_mov_b32 s33, s32 852; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 853; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 854; GCN-NEXT: s_mov_b64 exec, s[6:7] 855; GCN-NEXT: s_addk_i32 s32, 0x400 856; GCN-NEXT: v_writelane_b32 v40, s30, 0 857; GCN-NEXT: v_writelane_b32 v40, s31, 1 858; GCN-NEXT: v_writelane_b32 v40, s34, 2 859; GCN-NEXT: v_writelane_b32 v40, s35, 3 860; GCN-NEXT: v_writelane_b32 v40, s36, 4 861; GCN-NEXT: v_writelane_b32 v40, s37, 5 862; GCN-NEXT: v_writelane_b32 v40, s38, 6 863; GCN-NEXT: v_writelane_b32 v40, s39, 7 864; GCN-NEXT: v_writelane_b32 v40, s40, 8 865; GCN-NEXT: v_writelane_b32 v40, s41, 9 866; GCN-NEXT: v_writelane_b32 v40, s42, 10 867; GCN-NEXT: v_writelane_b32 v40, s43, 11 868; GCN-NEXT: v_writelane_b32 v40, s44, 12 869; GCN-NEXT: v_writelane_b32 v40, s45, 13 870; GCN-NEXT: v_writelane_b32 v40, s46, 14 871; GCN-NEXT: v_writelane_b32 v40, s47, 15 872; GCN-NEXT: v_writelane_b32 v40, s48, 16 873; GCN-NEXT: v_writelane_b32 v40, s49, 17 874; GCN-NEXT: v_writelane_b32 v40, s50, 18 875; GCN-NEXT: v_writelane_b32 v40, s51, 19 876; GCN-NEXT: v_writelane_b32 v40, s52, 20 877; GCN-NEXT: v_writelane_b32 v40, s53, 21 878; GCN-NEXT: v_writelane_b32 v40, s54, 22 879; GCN-NEXT: v_writelane_b32 v40, s55, 23 880; GCN-NEXT: v_writelane_b32 v40, s56, 24 881; GCN-NEXT: v_writelane_b32 v40, s57, 25 882; GCN-NEXT: v_writelane_b32 v40, s58, 26 883; GCN-NEXT: v_writelane_b32 v40, s59, 27 884; GCN-NEXT: v_writelane_b32 v40, s60, 28 885; GCN-NEXT: v_writelane_b32 v40, s61, 29 886; GCN-NEXT: v_writelane_b32 v40, s62, 30 887; GCN-NEXT: v_writelane_b32 v40, s63, 31 888; GCN-NEXT: s_mov_b64 s[6:7], exec 889; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 890; GCN-NEXT: v_readfirstlane_b32 s8, v0 891; GCN-NEXT: v_readfirstlane_b32 s9, v1 892; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] 893; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc 894; GCN-NEXT: s_movk_i32 s4, 0x7b 895; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 896; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 897; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] 898; GCN-NEXT: s_cbranch_execnz .LBB6_1 899; GCN-NEXT: ; %bb.2: 900; GCN-NEXT: s_mov_b64 exec, s[6:7] 901; GCN-NEXT: v_readlane_b32 s63, v40, 31 902; GCN-NEXT: v_readlane_b32 s62, v40, 30 903; GCN-NEXT: v_readlane_b32 s61, v40, 29 904; GCN-NEXT: v_readlane_b32 s60, v40, 28 905; GCN-NEXT: v_readlane_b32 s59, v40, 27 906; GCN-NEXT: v_readlane_b32 s58, v40, 26 907; GCN-NEXT: v_readlane_b32 s57, v40, 25 908; GCN-NEXT: v_readlane_b32 s56, v40, 24 909; GCN-NEXT: v_readlane_b32 s55, v40, 23 910; GCN-NEXT: v_readlane_b32 s54, v40, 22 911; GCN-NEXT: v_readlane_b32 s53, v40, 21 912; GCN-NEXT: v_readlane_b32 s52, v40, 20 913; GCN-NEXT: v_readlane_b32 s51, v40, 19 914; GCN-NEXT: v_readlane_b32 s50, v40, 18 915; GCN-NEXT: v_readlane_b32 s49, v40, 17 916; GCN-NEXT: v_readlane_b32 s48, v40, 16 917; GCN-NEXT: v_readlane_b32 s47, v40, 15 918; GCN-NEXT: v_readlane_b32 s46, v40, 14 919; GCN-NEXT: v_readlane_b32 s45, v40, 13 920; GCN-NEXT: v_readlane_b32 s44, v40, 12 921; GCN-NEXT: v_readlane_b32 s43, v40, 11 922; GCN-NEXT: v_readlane_b32 s42, v40, 10 923; GCN-NEXT: v_readlane_b32 s41, v40, 9 924; GCN-NEXT: v_readlane_b32 s40, v40, 8 925; GCN-NEXT: v_readlane_b32 s39, v40, 7 926; GCN-NEXT: v_readlane_b32 s38, v40, 6 927; GCN-NEXT: v_readlane_b32 s37, v40, 5 928; GCN-NEXT: v_readlane_b32 s36, v40, 4 929; GCN-NEXT: v_readlane_b32 s35, v40, 3 930; GCN-NEXT: v_readlane_b32 s34, v40, 2 931; GCN-NEXT: v_readlane_b32 s31, v40, 1 932; GCN-NEXT: v_readlane_b32 s30, v40, 0 933; GCN-NEXT: s_mov_b32 s32, s33 934; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 935; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 936; GCN-NEXT: s_mov_b64 exec, s[6:7] 937; GCN-NEXT: s_mov_b32 s33, s5 938; GCN-NEXT: s_waitcnt vmcnt(0) 939; GCN-NEXT: s_setpc_b64 s[30:31] 940; 941; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: 942; GISEL: ; %bb.0: 943; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 944; GISEL-NEXT: s_mov_b32 s5, s33 945; GISEL-NEXT: s_mov_b32 s33, s32 946; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 947; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 948; GISEL-NEXT: s_mov_b64 exec, s[6:7] 949; GISEL-NEXT: s_addk_i32 s32, 0x400 950; GISEL-NEXT: v_writelane_b32 v40, s30, 0 951; GISEL-NEXT: v_writelane_b32 v40, s31, 1 952; GISEL-NEXT: v_writelane_b32 v40, s34, 2 953; GISEL-NEXT: v_writelane_b32 v40, s35, 3 954; GISEL-NEXT: v_writelane_b32 v40, s36, 4 955; GISEL-NEXT: v_writelane_b32 v40, s37, 5 956; GISEL-NEXT: v_writelane_b32 v40, s38, 6 957; GISEL-NEXT: v_writelane_b32 v40, s39, 7 958; GISEL-NEXT: v_writelane_b32 v40, s40, 8 959; GISEL-NEXT: v_writelane_b32 v40, s41, 9 960; GISEL-NEXT: v_writelane_b32 v40, s42, 10 961; GISEL-NEXT: v_writelane_b32 v40, s43, 11 962; GISEL-NEXT: v_writelane_b32 v40, s44, 12 963; GISEL-NEXT: v_writelane_b32 v40, s45, 13 964; GISEL-NEXT: v_writelane_b32 v40, s46, 14 965; GISEL-NEXT: v_writelane_b32 v40, s47, 15 966; GISEL-NEXT: v_writelane_b32 v40, s48, 16 967; GISEL-NEXT: v_writelane_b32 v40, s49, 17 968; GISEL-NEXT: v_writelane_b32 v40, s50, 18 969; GISEL-NEXT: v_writelane_b32 v40, s51, 19 970; GISEL-NEXT: v_writelane_b32 v40, s52, 20 971; GISEL-NEXT: v_writelane_b32 v40, s53, 21 972; GISEL-NEXT: v_writelane_b32 v40, s54, 22 973; GISEL-NEXT: v_writelane_b32 v40, s55, 23 974; GISEL-NEXT: v_writelane_b32 v40, s56, 24 975; GISEL-NEXT: v_writelane_b32 v40, s57, 25 976; GISEL-NEXT: v_writelane_b32 v40, s58, 26 977; GISEL-NEXT: v_writelane_b32 v40, s59, 27 978; GISEL-NEXT: v_writelane_b32 v40, s60, 28 979; GISEL-NEXT: v_writelane_b32 v40, s61, 29 980; GISEL-NEXT: v_writelane_b32 v40, s62, 30 981; GISEL-NEXT: v_writelane_b32 v40, s63, 31 982; GISEL-NEXT: s_mov_b64 s[6:7], exec 983; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 984; GISEL-NEXT: v_readfirstlane_b32 s8, v0 985; GISEL-NEXT: v_readfirstlane_b32 s9, v1 986; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] 987; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc 988; GISEL-NEXT: s_movk_i32 s4, 0x7b 989; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 990; GISEL-NEXT: ; implicit-def: $vgpr0 991; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] 992; GISEL-NEXT: s_cbranch_execnz .LBB6_1 993; GISEL-NEXT: ; %bb.2: 994; GISEL-NEXT: s_mov_b64 exec, s[6:7] 995; GISEL-NEXT: v_readlane_b32 s63, v40, 31 996; GISEL-NEXT: v_readlane_b32 s62, v40, 30 997; GISEL-NEXT: v_readlane_b32 s61, v40, 29 998; GISEL-NEXT: v_readlane_b32 s60, v40, 28 999; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1000; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1001; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1002; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1003; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1004; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1005; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1006; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1007; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1008; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1009; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1010; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1011; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1012; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1013; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1014; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1015; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1016; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1017; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1018; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1019; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1020; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1021; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1022; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1023; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1024; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1025; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1026; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1027; GISEL-NEXT: s_mov_b32 s32, s33 1028; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 1029; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1030; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1031; GISEL-NEXT: s_mov_b32 s33, s5 1032; GISEL-NEXT: s_waitcnt vmcnt(0) 1033; GISEL-NEXT: s_setpc_b64 s[30:31] 1034 call amdgpu_gfx void %fptr(i32 inreg 123) 1035 ret void 1036} 1037 1038define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { 1039; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: 1040; GCN: ; %bb.0: 1041; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1042; GCN-NEXT: s_mov_b32 s10, s33 1043; GCN-NEXT: s_mov_b32 s33, s32 1044; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1045; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 1046; GCN-NEXT: s_mov_b64 exec, s[4:5] 1047; GCN-NEXT: s_addk_i32 s32, 0x400 1048; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1049; GCN-NEXT: v_writelane_b32 v41, s30, 0 1050; GCN-NEXT: v_writelane_b32 v41, s31, 1 1051; GCN-NEXT: v_writelane_b32 v41, s34, 2 1052; GCN-NEXT: v_writelane_b32 v41, s35, 3 1053; GCN-NEXT: v_writelane_b32 v41, s36, 4 1054; GCN-NEXT: v_writelane_b32 v41, s37, 5 1055; GCN-NEXT: v_writelane_b32 v41, s38, 6 1056; GCN-NEXT: v_writelane_b32 v41, s39, 7 1057; GCN-NEXT: v_writelane_b32 v41, s40, 8 1058; GCN-NEXT: v_writelane_b32 v41, s41, 9 1059; GCN-NEXT: v_writelane_b32 v41, s42, 10 1060; GCN-NEXT: v_writelane_b32 v41, s43, 11 1061; GCN-NEXT: v_writelane_b32 v41, s44, 12 1062; GCN-NEXT: v_writelane_b32 v41, s45, 13 1063; GCN-NEXT: v_writelane_b32 v41, s46, 14 1064; GCN-NEXT: v_writelane_b32 v41, s47, 15 1065; GCN-NEXT: v_writelane_b32 v41, s48, 16 1066; GCN-NEXT: v_writelane_b32 v41, s49, 17 1067; GCN-NEXT: v_writelane_b32 v41, s50, 18 1068; GCN-NEXT: v_writelane_b32 v41, s51, 19 1069; GCN-NEXT: v_writelane_b32 v41, s52, 20 1070; GCN-NEXT: v_writelane_b32 v41, s53, 21 1071; GCN-NEXT: v_writelane_b32 v41, s54, 22 1072; GCN-NEXT: v_writelane_b32 v41, s55, 23 1073; GCN-NEXT: v_writelane_b32 v41, s56, 24 1074; GCN-NEXT: v_writelane_b32 v41, s57, 25 1075; GCN-NEXT: v_writelane_b32 v41, s58, 26 1076; GCN-NEXT: v_writelane_b32 v41, s59, 27 1077; GCN-NEXT: v_writelane_b32 v41, s60, 28 1078; GCN-NEXT: v_writelane_b32 v41, s61, 29 1079; GCN-NEXT: v_writelane_b32 v41, s62, 30 1080; GCN-NEXT: v_writelane_b32 v41, s63, 31 1081; GCN-NEXT: v_mov_b32_e32 v40, v0 1082; GCN-NEXT: s_mov_b64 s[4:5], exec 1083; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 1084; GCN-NEXT: v_readfirstlane_b32 s6, v1 1085; GCN-NEXT: v_readfirstlane_b32 s7, v2 1086; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] 1087; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1088; GCN-NEXT: v_mov_b32_e32 v0, v40 1089; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] 1090; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 1091; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1092; GCN-NEXT: s_cbranch_execnz .LBB7_1 1093; GCN-NEXT: ; %bb.2: 1094; GCN-NEXT: s_mov_b64 exec, s[4:5] 1095; GCN-NEXT: v_mov_b32_e32 v0, v40 1096; GCN-NEXT: v_readlane_b32 s63, v41, 31 1097; GCN-NEXT: v_readlane_b32 s62, v41, 30 1098; GCN-NEXT: v_readlane_b32 s61, v41, 29 1099; GCN-NEXT: v_readlane_b32 s60, v41, 28 1100; GCN-NEXT: v_readlane_b32 s59, v41, 27 1101; GCN-NEXT: v_readlane_b32 s58, v41, 26 1102; GCN-NEXT: v_readlane_b32 s57, v41, 25 1103; GCN-NEXT: v_readlane_b32 s56, v41, 24 1104; GCN-NEXT: v_readlane_b32 s55, v41, 23 1105; GCN-NEXT: v_readlane_b32 s54, v41, 22 1106; GCN-NEXT: v_readlane_b32 s53, v41, 21 1107; GCN-NEXT: v_readlane_b32 s52, v41, 20 1108; GCN-NEXT: v_readlane_b32 s51, v41, 19 1109; GCN-NEXT: v_readlane_b32 s50, v41, 18 1110; GCN-NEXT: v_readlane_b32 s49, v41, 17 1111; GCN-NEXT: v_readlane_b32 s48, v41, 16 1112; GCN-NEXT: v_readlane_b32 s47, v41, 15 1113; GCN-NEXT: v_readlane_b32 s46, v41, 14 1114; GCN-NEXT: v_readlane_b32 s45, v41, 13 1115; GCN-NEXT: v_readlane_b32 s44, v41, 12 1116; GCN-NEXT: v_readlane_b32 s43, v41, 11 1117; GCN-NEXT: v_readlane_b32 s42, v41, 10 1118; GCN-NEXT: v_readlane_b32 s41, v41, 9 1119; GCN-NEXT: v_readlane_b32 s40, v41, 8 1120; GCN-NEXT: v_readlane_b32 s39, v41, 7 1121; GCN-NEXT: v_readlane_b32 s38, v41, 6 1122; GCN-NEXT: v_readlane_b32 s37, v41, 5 1123; GCN-NEXT: v_readlane_b32 s36, v41, 4 1124; GCN-NEXT: v_readlane_b32 s35, v41, 3 1125; GCN-NEXT: v_readlane_b32 s34, v41, 2 1126; GCN-NEXT: v_readlane_b32 s31, v41, 1 1127; GCN-NEXT: v_readlane_b32 s30, v41, 0 1128; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1129; GCN-NEXT: s_mov_b32 s32, s33 1130; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1131; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 1132; GCN-NEXT: s_mov_b64 exec, s[4:5] 1133; GCN-NEXT: s_mov_b32 s33, s10 1134; GCN-NEXT: s_waitcnt vmcnt(0) 1135; GCN-NEXT: s_setpc_b64 s[30:31] 1136; 1137; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: 1138; GISEL: ; %bb.0: 1139; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1140; GISEL-NEXT: s_mov_b32 s10, s33 1141; GISEL-NEXT: s_mov_b32 s33, s32 1142; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1143; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 1144; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1145; GISEL-NEXT: s_addk_i32 s32, 0x400 1146; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1147; GISEL-NEXT: v_writelane_b32 v41, s30, 0 1148; GISEL-NEXT: v_writelane_b32 v41, s31, 1 1149; GISEL-NEXT: v_writelane_b32 v41, s34, 2 1150; GISEL-NEXT: v_writelane_b32 v41, s35, 3 1151; GISEL-NEXT: v_writelane_b32 v41, s36, 4 1152; GISEL-NEXT: v_writelane_b32 v41, s37, 5 1153; GISEL-NEXT: v_writelane_b32 v41, s38, 6 1154; GISEL-NEXT: v_writelane_b32 v41, s39, 7 1155; GISEL-NEXT: v_writelane_b32 v41, s40, 8 1156; GISEL-NEXT: v_writelane_b32 v41, s41, 9 1157; GISEL-NEXT: v_writelane_b32 v41, s42, 10 1158; GISEL-NEXT: v_writelane_b32 v41, s43, 11 1159; GISEL-NEXT: v_writelane_b32 v41, s44, 12 1160; GISEL-NEXT: v_writelane_b32 v41, s45, 13 1161; GISEL-NEXT: v_writelane_b32 v41, s46, 14 1162; GISEL-NEXT: v_writelane_b32 v41, s47, 15 1163; GISEL-NEXT: v_writelane_b32 v41, s48, 16 1164; GISEL-NEXT: v_writelane_b32 v41, s49, 17 1165; GISEL-NEXT: v_writelane_b32 v41, s50, 18 1166; GISEL-NEXT: v_writelane_b32 v41, s51, 19 1167; GISEL-NEXT: v_writelane_b32 v41, s52, 20 1168; GISEL-NEXT: v_writelane_b32 v41, s53, 21 1169; GISEL-NEXT: v_writelane_b32 v41, s54, 22 1170; GISEL-NEXT: v_writelane_b32 v41, s55, 23 1171; GISEL-NEXT: v_writelane_b32 v41, s56, 24 1172; GISEL-NEXT: v_writelane_b32 v41, s57, 25 1173; GISEL-NEXT: v_writelane_b32 v41, s58, 26 1174; GISEL-NEXT: v_writelane_b32 v41, s59, 27 1175; GISEL-NEXT: v_writelane_b32 v41, s60, 28 1176; GISEL-NEXT: v_writelane_b32 v41, s61, 29 1177; GISEL-NEXT: v_writelane_b32 v41, s62, 30 1178; GISEL-NEXT: v_writelane_b32 v41, s63, 31 1179; GISEL-NEXT: v_mov_b32_e32 v40, v0 1180; GISEL-NEXT: s_mov_b64 s[4:5], exec 1181; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 1182; GISEL-NEXT: v_readfirstlane_b32 s6, v1 1183; GISEL-NEXT: v_readfirstlane_b32 s7, v2 1184; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] 1185; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc 1186; GISEL-NEXT: v_mov_b32_e32 v0, v40 1187; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] 1188; GISEL-NEXT: ; implicit-def: $vgpr1 1189; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] 1190; GISEL-NEXT: s_cbranch_execnz .LBB7_1 1191; GISEL-NEXT: ; %bb.2: 1192; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1193; GISEL-NEXT: v_mov_b32_e32 v0, v40 1194; GISEL-NEXT: v_readlane_b32 s63, v41, 31 1195; GISEL-NEXT: v_readlane_b32 s62, v41, 30 1196; GISEL-NEXT: v_readlane_b32 s61, v41, 29 1197; GISEL-NEXT: v_readlane_b32 s60, v41, 28 1198; GISEL-NEXT: v_readlane_b32 s59, v41, 27 1199; GISEL-NEXT: v_readlane_b32 s58, v41, 26 1200; GISEL-NEXT: v_readlane_b32 s57, v41, 25 1201; GISEL-NEXT: v_readlane_b32 s56, v41, 24 1202; GISEL-NEXT: v_readlane_b32 s55, v41, 23 1203; GISEL-NEXT: v_readlane_b32 s54, v41, 22 1204; GISEL-NEXT: v_readlane_b32 s53, v41, 21 1205; GISEL-NEXT: v_readlane_b32 s52, v41, 20 1206; GISEL-NEXT: v_readlane_b32 s51, v41, 19 1207; GISEL-NEXT: v_readlane_b32 s50, v41, 18 1208; GISEL-NEXT: v_readlane_b32 s49, v41, 17 1209; GISEL-NEXT: v_readlane_b32 s48, v41, 16 1210; GISEL-NEXT: v_readlane_b32 s47, v41, 15 1211; GISEL-NEXT: v_readlane_b32 s46, v41, 14 1212; GISEL-NEXT: v_readlane_b32 s45, v41, 13 1213; GISEL-NEXT: v_readlane_b32 s44, v41, 12 1214; GISEL-NEXT: v_readlane_b32 s43, v41, 11 1215; GISEL-NEXT: v_readlane_b32 s42, v41, 10 1216; GISEL-NEXT: v_readlane_b32 s41, v41, 9 1217; GISEL-NEXT: v_readlane_b32 s40, v41, 8 1218; GISEL-NEXT: v_readlane_b32 s39, v41, 7 1219; GISEL-NEXT: v_readlane_b32 s38, v41, 6 1220; GISEL-NEXT: v_readlane_b32 s37, v41, 5 1221; GISEL-NEXT: v_readlane_b32 s36, v41, 4 1222; GISEL-NEXT: v_readlane_b32 s35, v41, 3 1223; GISEL-NEXT: v_readlane_b32 s34, v41, 2 1224; GISEL-NEXT: v_readlane_b32 s31, v41, 1 1225; GISEL-NEXT: v_readlane_b32 s30, v41, 0 1226; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1227; GISEL-NEXT: s_mov_b32 s32, s33 1228; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1229; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 1230; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1231; GISEL-NEXT: s_mov_b32 s33, s10 1232; GISEL-NEXT: s_waitcnt vmcnt(0) 1233; GISEL-NEXT: s_setpc_b64 s[30:31] 1234 call amdgpu_gfx void %fptr(i32 %i) 1235 ret i32 %i 1236} 1237 1238; Use a variable inside a waterfall loop and use the return variable after the loop. 1239; TODO The argument and return variable could be in the same physical register, but the register 1240; allocator is not able to do that because the return value clashes with the liverange of an 1241; IMPLICIT_DEF of the argument. 1242define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { 1243; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: 1244; GCN: ; %bb.0: 1245; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1246; GCN-NEXT: s_mov_b32 s10, s33 1247; GCN-NEXT: s_mov_b32 s33, s32 1248; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1249; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1250; GCN-NEXT: s_mov_b64 exec, s[4:5] 1251; GCN-NEXT: s_addk_i32 s32, 0x400 1252; GCN-NEXT: v_writelane_b32 v40, s30, 0 1253; GCN-NEXT: v_writelane_b32 v40, s31, 1 1254; GCN-NEXT: v_writelane_b32 v40, s34, 2 1255; GCN-NEXT: v_writelane_b32 v40, s35, 3 1256; GCN-NEXT: v_writelane_b32 v40, s36, 4 1257; GCN-NEXT: v_writelane_b32 v40, s37, 5 1258; GCN-NEXT: v_writelane_b32 v40, s38, 6 1259; GCN-NEXT: v_writelane_b32 v40, s39, 7 1260; GCN-NEXT: v_writelane_b32 v40, s40, 8 1261; GCN-NEXT: v_writelane_b32 v40, s41, 9 1262; GCN-NEXT: v_writelane_b32 v40, s42, 10 1263; GCN-NEXT: v_writelane_b32 v40, s43, 11 1264; GCN-NEXT: v_writelane_b32 v40, s44, 12 1265; GCN-NEXT: v_writelane_b32 v40, s45, 13 1266; GCN-NEXT: v_writelane_b32 v40, s46, 14 1267; GCN-NEXT: v_writelane_b32 v40, s47, 15 1268; GCN-NEXT: v_writelane_b32 v40, s48, 16 1269; GCN-NEXT: v_writelane_b32 v40, s49, 17 1270; GCN-NEXT: v_writelane_b32 v40, s50, 18 1271; GCN-NEXT: v_writelane_b32 v40, s51, 19 1272; GCN-NEXT: v_writelane_b32 v40, s52, 20 1273; GCN-NEXT: v_writelane_b32 v40, s53, 21 1274; GCN-NEXT: v_writelane_b32 v40, s54, 22 1275; GCN-NEXT: v_writelane_b32 v40, s55, 23 1276; GCN-NEXT: v_writelane_b32 v40, s56, 24 1277; GCN-NEXT: v_writelane_b32 v40, s57, 25 1278; GCN-NEXT: v_writelane_b32 v40, s58, 26 1279; GCN-NEXT: v_writelane_b32 v40, s59, 27 1280; GCN-NEXT: v_writelane_b32 v40, s60, 28 1281; GCN-NEXT: v_writelane_b32 v40, s61, 29 1282; GCN-NEXT: v_writelane_b32 v40, s62, 30 1283; GCN-NEXT: v_writelane_b32 v40, s63, 31 1284; GCN-NEXT: s_mov_b64 s[4:5], exec 1285; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1286; GCN-NEXT: v_readfirstlane_b32 s8, v1 1287; GCN-NEXT: v_readfirstlane_b32 s9, v2 1288; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 1289; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc 1290; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 1291; GCN-NEXT: v_mov_b32_e32 v3, v0 1292; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 1293; GCN-NEXT: ; implicit-def: $vgpr0 1294; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] 1295; GCN-NEXT: s_cbranch_execnz .LBB8_1 1296; GCN-NEXT: ; %bb.2: 1297; GCN-NEXT: s_mov_b64 exec, s[4:5] 1298; GCN-NEXT: v_mov_b32_e32 v0, v3 1299; GCN-NEXT: v_readlane_b32 s63, v40, 31 1300; GCN-NEXT: v_readlane_b32 s62, v40, 30 1301; GCN-NEXT: v_readlane_b32 s61, v40, 29 1302; GCN-NEXT: v_readlane_b32 s60, v40, 28 1303; GCN-NEXT: v_readlane_b32 s59, v40, 27 1304; GCN-NEXT: v_readlane_b32 s58, v40, 26 1305; GCN-NEXT: v_readlane_b32 s57, v40, 25 1306; GCN-NEXT: v_readlane_b32 s56, v40, 24 1307; GCN-NEXT: v_readlane_b32 s55, v40, 23 1308; GCN-NEXT: v_readlane_b32 s54, v40, 22 1309; GCN-NEXT: v_readlane_b32 s53, v40, 21 1310; GCN-NEXT: v_readlane_b32 s52, v40, 20 1311; GCN-NEXT: v_readlane_b32 s51, v40, 19 1312; GCN-NEXT: v_readlane_b32 s50, v40, 18 1313; GCN-NEXT: v_readlane_b32 s49, v40, 17 1314; GCN-NEXT: v_readlane_b32 s48, v40, 16 1315; GCN-NEXT: v_readlane_b32 s47, v40, 15 1316; GCN-NEXT: v_readlane_b32 s46, v40, 14 1317; GCN-NEXT: v_readlane_b32 s45, v40, 13 1318; GCN-NEXT: v_readlane_b32 s44, v40, 12 1319; GCN-NEXT: v_readlane_b32 s43, v40, 11 1320; GCN-NEXT: v_readlane_b32 s42, v40, 10 1321; GCN-NEXT: v_readlane_b32 s41, v40, 9 1322; GCN-NEXT: v_readlane_b32 s40, v40, 8 1323; GCN-NEXT: v_readlane_b32 s39, v40, 7 1324; GCN-NEXT: v_readlane_b32 s38, v40, 6 1325; GCN-NEXT: v_readlane_b32 s37, v40, 5 1326; GCN-NEXT: v_readlane_b32 s36, v40, 4 1327; GCN-NEXT: v_readlane_b32 s35, v40, 3 1328; GCN-NEXT: v_readlane_b32 s34, v40, 2 1329; GCN-NEXT: v_readlane_b32 s31, v40, 1 1330; GCN-NEXT: v_readlane_b32 s30, v40, 0 1331; GCN-NEXT: s_mov_b32 s32, s33 1332; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1333; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1334; GCN-NEXT: s_mov_b64 exec, s[4:5] 1335; GCN-NEXT: s_mov_b32 s33, s10 1336; GCN-NEXT: s_waitcnt vmcnt(0) 1337; GCN-NEXT: s_setpc_b64 s[30:31] 1338; 1339; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: 1340; GISEL: ; %bb.0: 1341; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1342; GISEL-NEXT: s_mov_b32 s10, s33 1343; GISEL-NEXT: s_mov_b32 s33, s32 1344; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1345; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1346; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1347; GISEL-NEXT: s_addk_i32 s32, 0x400 1348; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1349; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1350; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1351; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1352; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1353; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1354; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1355; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1356; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1357; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1358; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1359; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1360; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1361; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1362; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1363; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1364; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1365; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1366; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1367; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1368; GISEL-NEXT: v_writelane_b32 v40, s52, 20 1369; GISEL-NEXT: v_writelane_b32 v40, s53, 21 1370; GISEL-NEXT: v_writelane_b32 v40, s54, 22 1371; GISEL-NEXT: v_writelane_b32 v40, s55, 23 1372; GISEL-NEXT: v_writelane_b32 v40, s56, 24 1373; GISEL-NEXT: v_writelane_b32 v40, s57, 25 1374; GISEL-NEXT: v_writelane_b32 v40, s58, 26 1375; GISEL-NEXT: v_writelane_b32 v40, s59, 27 1376; GISEL-NEXT: v_writelane_b32 v40, s60, 28 1377; GISEL-NEXT: v_writelane_b32 v40, s61, 29 1378; GISEL-NEXT: v_writelane_b32 v40, s62, 30 1379; GISEL-NEXT: v_writelane_b32 v40, s63, 31 1380; GISEL-NEXT: s_mov_b64 s[4:5], exec 1381; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1382; GISEL-NEXT: v_readfirstlane_b32 s8, v1 1383; GISEL-NEXT: v_readfirstlane_b32 s9, v2 1384; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 1385; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc 1386; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 1387; GISEL-NEXT: v_mov_b32_e32 v2, v0 1388; GISEL-NEXT: ; implicit-def: $vgpr1 1389; GISEL-NEXT: ; implicit-def: $vgpr0 1390; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7] 1391; GISEL-NEXT: s_cbranch_execnz .LBB8_1 1392; GISEL-NEXT: ; %bb.2: 1393; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1394; GISEL-NEXT: v_mov_b32_e32 v0, v2 1395; GISEL-NEXT: v_readlane_b32 s63, v40, 31 1396; GISEL-NEXT: v_readlane_b32 s62, v40, 30 1397; GISEL-NEXT: v_readlane_b32 s61, v40, 29 1398; GISEL-NEXT: v_readlane_b32 s60, v40, 28 1399; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1400; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1401; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1402; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1403; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1404; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1405; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1406; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1407; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1408; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1409; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1410; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1411; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1412; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1413; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1414; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1415; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1416; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1417; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1418; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1419; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1420; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1421; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1422; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1423; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1424; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1425; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1426; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1427; GISEL-NEXT: s_mov_b32 s32, s33 1428; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1429; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1430; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1431; GISEL-NEXT: s_mov_b32 s33, s10 1432; GISEL-NEXT: s_waitcnt vmcnt(0) 1433; GISEL-NEXT: s_setpc_b64 s[30:31] 1434 %ret = call amdgpu_gfx i32 %fptr(i32 %i) 1435 ret i32 %ret 1436} 1437 1438; Calling a vgpr can never be a tail call. 1439define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { 1440; GCN-LABEL: test_indirect_tail_call_vgpr_ptr: 1441; GCN: ; %bb.0: 1442; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; GCN-NEXT: s_mov_b32 s10, s33 1444; GCN-NEXT: s_mov_b32 s33, s32 1445; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1446; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1447; GCN-NEXT: s_mov_b64 exec, s[4:5] 1448; GCN-NEXT: s_addk_i32 s32, 0x400 1449; GCN-NEXT: v_writelane_b32 v40, s30, 0 1450; GCN-NEXT: v_writelane_b32 v40, s31, 1 1451; GCN-NEXT: v_writelane_b32 v40, s34, 2 1452; GCN-NEXT: v_writelane_b32 v40, s35, 3 1453; GCN-NEXT: v_writelane_b32 v40, s36, 4 1454; GCN-NEXT: v_writelane_b32 v40, s37, 5 1455; GCN-NEXT: v_writelane_b32 v40, s38, 6 1456; GCN-NEXT: v_writelane_b32 v40, s39, 7 1457; GCN-NEXT: v_writelane_b32 v40, s40, 8 1458; GCN-NEXT: v_writelane_b32 v40, s41, 9 1459; GCN-NEXT: v_writelane_b32 v40, s42, 10 1460; GCN-NEXT: v_writelane_b32 v40, s43, 11 1461; GCN-NEXT: v_writelane_b32 v40, s44, 12 1462; GCN-NEXT: v_writelane_b32 v40, s45, 13 1463; GCN-NEXT: v_writelane_b32 v40, s46, 14 1464; GCN-NEXT: v_writelane_b32 v40, s47, 15 1465; GCN-NEXT: v_writelane_b32 v40, s48, 16 1466; GCN-NEXT: v_writelane_b32 v40, s49, 17 1467; GCN-NEXT: v_writelane_b32 v40, s50, 18 1468; GCN-NEXT: v_writelane_b32 v40, s51, 19 1469; GCN-NEXT: v_writelane_b32 v40, s52, 20 1470; GCN-NEXT: v_writelane_b32 v40, s53, 21 1471; GCN-NEXT: v_writelane_b32 v40, s54, 22 1472; GCN-NEXT: v_writelane_b32 v40, s55, 23 1473; GCN-NEXT: v_writelane_b32 v40, s56, 24 1474; GCN-NEXT: v_writelane_b32 v40, s57, 25 1475; GCN-NEXT: v_writelane_b32 v40, s58, 26 1476; GCN-NEXT: v_writelane_b32 v40, s59, 27 1477; GCN-NEXT: v_writelane_b32 v40, s60, 28 1478; GCN-NEXT: v_writelane_b32 v40, s61, 29 1479; GCN-NEXT: v_writelane_b32 v40, s62, 30 1480; GCN-NEXT: v_writelane_b32 v40, s63, 31 1481; GCN-NEXT: s_mov_b64 s[4:5], exec 1482; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1483; GCN-NEXT: v_readfirstlane_b32 s6, v0 1484; GCN-NEXT: v_readfirstlane_b32 s7, v1 1485; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] 1486; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1487; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] 1488; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 1489; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1490; GCN-NEXT: s_cbranch_execnz .LBB9_1 1491; GCN-NEXT: ; %bb.2: 1492; GCN-NEXT: s_mov_b64 exec, s[4:5] 1493; GCN-NEXT: v_readlane_b32 s63, v40, 31 1494; GCN-NEXT: v_readlane_b32 s62, v40, 30 1495; GCN-NEXT: v_readlane_b32 s61, v40, 29 1496; GCN-NEXT: v_readlane_b32 s60, v40, 28 1497; GCN-NEXT: v_readlane_b32 s59, v40, 27 1498; GCN-NEXT: v_readlane_b32 s58, v40, 26 1499; GCN-NEXT: v_readlane_b32 s57, v40, 25 1500; GCN-NEXT: v_readlane_b32 s56, v40, 24 1501; GCN-NEXT: v_readlane_b32 s55, v40, 23 1502; GCN-NEXT: v_readlane_b32 s54, v40, 22 1503; GCN-NEXT: v_readlane_b32 s53, v40, 21 1504; GCN-NEXT: v_readlane_b32 s52, v40, 20 1505; GCN-NEXT: v_readlane_b32 s51, v40, 19 1506; GCN-NEXT: v_readlane_b32 s50, v40, 18 1507; GCN-NEXT: v_readlane_b32 s49, v40, 17 1508; GCN-NEXT: v_readlane_b32 s48, v40, 16 1509; GCN-NEXT: v_readlane_b32 s47, v40, 15 1510; GCN-NEXT: v_readlane_b32 s46, v40, 14 1511; GCN-NEXT: v_readlane_b32 s45, v40, 13 1512; GCN-NEXT: v_readlane_b32 s44, v40, 12 1513; GCN-NEXT: v_readlane_b32 s43, v40, 11 1514; GCN-NEXT: v_readlane_b32 s42, v40, 10 1515; GCN-NEXT: v_readlane_b32 s41, v40, 9 1516; GCN-NEXT: v_readlane_b32 s40, v40, 8 1517; GCN-NEXT: v_readlane_b32 s39, v40, 7 1518; GCN-NEXT: v_readlane_b32 s38, v40, 6 1519; GCN-NEXT: v_readlane_b32 s37, v40, 5 1520; GCN-NEXT: v_readlane_b32 s36, v40, 4 1521; GCN-NEXT: v_readlane_b32 s35, v40, 3 1522; GCN-NEXT: v_readlane_b32 s34, v40, 2 1523; GCN-NEXT: v_readlane_b32 s31, v40, 1 1524; GCN-NEXT: v_readlane_b32 s30, v40, 0 1525; GCN-NEXT: s_mov_b32 s32, s33 1526; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1527; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1528; GCN-NEXT: s_mov_b64 exec, s[4:5] 1529; GCN-NEXT: s_mov_b32 s33, s10 1530; GCN-NEXT: s_waitcnt vmcnt(0) 1531; GCN-NEXT: s_setpc_b64 s[30:31] 1532; 1533; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: 1534; GISEL: ; %bb.0: 1535; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1536; GISEL-NEXT: s_mov_b32 s10, s33 1537; GISEL-NEXT: s_mov_b32 s33, s32 1538; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1539; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1540; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1541; GISEL-NEXT: s_addk_i32 s32, 0x400 1542; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1543; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1544; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1545; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1546; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1547; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1548; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1549; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1550; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1551; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1552; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1553; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1554; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1555; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1556; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1557; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1558; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1559; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1560; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1561; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1562; GISEL-NEXT: v_writelane_b32 v40, s52, 20 1563; GISEL-NEXT: v_writelane_b32 v40, s53, 21 1564; GISEL-NEXT: v_writelane_b32 v40, s54, 22 1565; GISEL-NEXT: v_writelane_b32 v40, s55, 23 1566; GISEL-NEXT: v_writelane_b32 v40, s56, 24 1567; GISEL-NEXT: v_writelane_b32 v40, s57, 25 1568; GISEL-NEXT: v_writelane_b32 v40, s58, 26 1569; GISEL-NEXT: v_writelane_b32 v40, s59, 27 1570; GISEL-NEXT: v_writelane_b32 v40, s60, 28 1571; GISEL-NEXT: v_writelane_b32 v40, s61, 29 1572; GISEL-NEXT: v_writelane_b32 v40, s62, 30 1573; GISEL-NEXT: v_writelane_b32 v40, s63, 31 1574; GISEL-NEXT: s_mov_b64 s[4:5], exec 1575; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1576; GISEL-NEXT: v_readfirstlane_b32 s6, v0 1577; GISEL-NEXT: v_readfirstlane_b32 s7, v1 1578; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] 1579; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc 1580; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] 1581; GISEL-NEXT: ; implicit-def: $vgpr0 1582; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] 1583; GISEL-NEXT: s_cbranch_execnz .LBB9_1 1584; GISEL-NEXT: ; %bb.2: 1585; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1586; GISEL-NEXT: v_readlane_b32 s63, v40, 31 1587; GISEL-NEXT: v_readlane_b32 s62, v40, 30 1588; GISEL-NEXT: v_readlane_b32 s61, v40, 29 1589; GISEL-NEXT: v_readlane_b32 s60, v40, 28 1590; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1591; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1592; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1593; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1594; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1595; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1596; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1597; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1598; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1599; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1600; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1601; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1602; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1603; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1604; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1605; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1606; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1607; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1608; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1609; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1610; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1611; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1612; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1613; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1614; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1615; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1616; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1617; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1618; GISEL-NEXT: s_mov_b32 s32, s33 1619; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1620; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1621; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1622; GISEL-NEXT: s_mov_b32 s33, s10 1623; GISEL-NEXT: s_waitcnt vmcnt(0) 1624; GISEL-NEXT: s_setpc_b64 s[30:31] 1625 tail call amdgpu_gfx void %fptr() 1626 ret void 1627} 1628 1629!llvm.module.flags = !{!0} 1630!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} 1631