1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s 5 6declare hidden amdgpu_gfx void @external_void_func_void() #0 7 8define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { 9; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: s_mov_b32 s34, s33 13; GFX9-NEXT: s_mov_b32 s33, s32 14; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 15; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 16; GFX9-NEXT: s_mov_b64 exec, s[36:37] 17; GFX9-NEXT: v_writelane_b32 v40, s34, 4 18; GFX9-NEXT: v_writelane_b32 v40, s4, 0 19; GFX9-NEXT: v_writelane_b32 v40, s5, 1 20; GFX9-NEXT: v_writelane_b32 v40, s30, 2 21; GFX9-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi 22; GFX9-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo 23; GFX9-NEXT: s_addk_i32 s32, 0x400 24; GFX9-NEXT: v_writelane_b32 v40, s31, 3 25; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 26; GFX9-NEXT: ;;#ASMSTART 27; GFX9-NEXT: ;;#ASMEND 28; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 29; GFX9-NEXT: v_readlane_b32 s31, v40, 3 30; GFX9-NEXT: v_readlane_b32 s30, v40, 2 31; GFX9-NEXT: v_readlane_b32 s5, v40, 1 32; GFX9-NEXT: v_readlane_b32 s4, v40, 0 33; GFX9-NEXT: s_mov_b32 s32, s33 34; GFX9-NEXT: v_readlane_b32 s34, v40, 4 35; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 36; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 37; GFX9-NEXT: s_mov_b64 exec, s[36:37] 38; GFX9-NEXT: s_mov_b32 s33, s34 39; GFX9-NEXT: s_waitcnt vmcnt(0) 40; GFX9-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 43; GFX10: ; %bb.0: 44; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX10-NEXT: s_mov_b32 s34, s33 46; GFX10-NEXT: s_mov_b32 s33, s32 47; GFX10-NEXT: s_or_saveexec_b32 s35, -1 48; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 49; GFX10-NEXT: s_waitcnt_depctr 0xffe3 50; GFX10-NEXT: s_mov_b32 exec_lo, s35 51; GFX10-NEXT: v_writelane_b32 v40, s34, 4 52; GFX10-NEXT: s_addk_i32 s32, 0x200 53; GFX10-NEXT: v_writelane_b32 v40, s4, 0 54; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo 55; GFX10-NEXT: v_writelane_b32 v40, s5, 1 56; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi 57; GFX10-NEXT: v_writelane_b32 v40, s30, 2 58; GFX10-NEXT: v_writelane_b32 v40, s31, 3 59; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 60; GFX10-NEXT: ;;#ASMSTART 61; GFX10-NEXT: ;;#ASMEND 62; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 63; GFX10-NEXT: v_readlane_b32 s31, v40, 3 64; GFX10-NEXT: v_readlane_b32 s30, v40, 2 65; GFX10-NEXT: v_readlane_b32 s5, v40, 1 66; GFX10-NEXT: v_readlane_b32 s4, v40, 0 67; GFX10-NEXT: s_mov_b32 s32, s33 68; GFX10-NEXT: v_readlane_b32 s34, v40, 4 69; GFX10-NEXT: s_or_saveexec_b32 s35, -1 70; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 71; GFX10-NEXT: s_waitcnt_depctr 0xffe3 72; GFX10-NEXT: s_mov_b32 exec_lo, s35 73; GFX10-NEXT: s_mov_b32 s33, s34 74; GFX10-NEXT: s_waitcnt vmcnt(0) 75; GFX10-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 78; GFX11: ; %bb.0: 79; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX11-NEXT: s_mov_b32 s0, s33 81; GFX11-NEXT: s_mov_b32 s33, s32 82; GFX11-NEXT: s_or_saveexec_b32 s1, -1 83; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 84; GFX11-NEXT: s_mov_b32 exec_lo, s1 85; GFX11-NEXT: v_writelane_b32 v40, s0, 4 86; GFX11-NEXT: s_add_i32 s32, s32, 16 87; GFX11-NEXT: v_writelane_b32 v40, s4, 0 88; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo 89; GFX11-NEXT: v_writelane_b32 v40, s5, 1 90; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi 91; GFX11-NEXT: v_writelane_b32 v40, s30, 2 92; GFX11-NEXT: v_writelane_b32 v40, s31, 3 93; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] 94; GFX11-NEXT: ;;#ASMSTART 95; GFX11-NEXT: ;;#ASMEND 96; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] 97; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 98; GFX11-NEXT: v_readlane_b32 s31, v40, 3 99; GFX11-NEXT: v_readlane_b32 s30, v40, 2 100; GFX11-NEXT: v_readlane_b32 s5, v40, 1 101; GFX11-NEXT: v_readlane_b32 s4, v40, 0 102; GFX11-NEXT: s_mov_b32 s32, s33 103; GFX11-NEXT: v_readlane_b32 s0, v40, 4 104; GFX11-NEXT: s_or_saveexec_b32 s1, -1 105; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 106; GFX11-NEXT: s_mov_b32 exec_lo, s1 107; GFX11-NEXT: s_mov_b32 s33, s0 108; GFX11-NEXT: s_waitcnt vmcnt(0) 109; GFX11-NEXT: s_setpc_b64 s[30:31] 110 call amdgpu_gfx void @external_void_func_void() 111 call void asm sideeffect "", ""() #0 112 call amdgpu_gfx void @external_void_func_void() 113 ret void 114} 115 116define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { 117; GFX9-LABEL: void_func_void_clobber_s28_s29: 118; GFX9: ; %bb.0: 119; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 121; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill 122; GFX9-NEXT: s_mov_b64 exec, s[34:35] 123; GFX9-NEXT: v_writelane_b32 v0, s28, 0 124; GFX9-NEXT: v_writelane_b32 v0, s29, 1 125; GFX9-NEXT: v_writelane_b32 v0, s30, 2 126; GFX9-NEXT: v_writelane_b32 v0, s31, 3 127; GFX9-NEXT: ;;#ASMSTART 128; GFX9-NEXT: ; clobber 129; GFX9-NEXT: ;;#ASMEND 130; GFX9-NEXT: ;;#ASMSTART 131; GFX9-NEXT: ; clobber 132; GFX9-NEXT: ;;#ASMEND 133; GFX9-NEXT: v_readlane_b32 s31, v0, 3 134; GFX9-NEXT: v_readlane_b32 s30, v0, 2 135; GFX9-NEXT: v_readlane_b32 s29, v0, 1 136; GFX9-NEXT: v_readlane_b32 s28, v0, 0 137; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 138; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 139; GFX9-NEXT: s_mov_b64 exec, s[34:35] 140; GFX9-NEXT: s_waitcnt vmcnt(0) 141; GFX9-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX10-LABEL: void_func_void_clobber_s28_s29: 144; GFX10: ; %bb.0: 145; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 147; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill 148; GFX10-NEXT: s_waitcnt_depctr 0xffe3 149; GFX10-NEXT: s_mov_b32 exec_lo, s34 150; GFX10-NEXT: v_writelane_b32 v0, s28, 0 151; GFX10-NEXT: v_writelane_b32 v0, s29, 1 152; GFX10-NEXT: v_writelane_b32 v0, s30, 2 153; GFX10-NEXT: v_writelane_b32 v0, s31, 3 154; GFX10-NEXT: ;;#ASMSTART 155; GFX10-NEXT: ; clobber 156; GFX10-NEXT: ;;#ASMEND 157; GFX10-NEXT: ;;#ASMSTART 158; GFX10-NEXT: ; clobber 159; GFX10-NEXT: ;;#ASMEND 160; GFX10-NEXT: v_readlane_b32 s31, v0, 3 161; GFX10-NEXT: v_readlane_b32 s30, v0, 2 162; GFX10-NEXT: v_readlane_b32 s29, v0, 1 163; GFX10-NEXT: v_readlane_b32 s28, v0, 0 164; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 165; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 166; GFX10-NEXT: s_waitcnt_depctr 0xffe3 167; GFX10-NEXT: s_mov_b32 exec_lo, s34 168; GFX10-NEXT: s_waitcnt vmcnt(0) 169; GFX10-NEXT: s_setpc_b64 s[30:31] 170; 171; GFX11-LABEL: void_func_void_clobber_s28_s29: 172; GFX11: ; %bb.0: 173; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 175; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill 176; GFX11-NEXT: s_mov_b32 exec_lo, s0 177; GFX11-NEXT: v_writelane_b32 v0, s28, 0 178; GFX11-NEXT: v_writelane_b32 v0, s29, 1 179; GFX11-NEXT: v_writelane_b32 v0, s30, 2 180; GFX11-NEXT: v_writelane_b32 v0, s31, 3 181; GFX11-NEXT: ;;#ASMSTART 182; GFX11-NEXT: ; clobber 183; GFX11-NEXT: ;;#ASMEND 184; GFX11-NEXT: ;;#ASMSTART 185; GFX11-NEXT: ; clobber 186; GFX11-NEXT: ;;#ASMEND 187; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 188; GFX11-NEXT: v_readlane_b32 s31, v0, 3 189; GFX11-NEXT: v_readlane_b32 s30, v0, 2 190; GFX11-NEXT: v_readlane_b32 s29, v0, 1 191; GFX11-NEXT: v_readlane_b32 s28, v0, 0 192; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 193; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload 194; GFX11-NEXT: s_mov_b32 exec_lo, s0 195; GFX11-NEXT: s_waitcnt vmcnt(0) 196; GFX11-NEXT: s_setpc_b64 s[30:31] 197 call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 198 call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 199 ret void 200} 201 202define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 { 203; GFX9-LABEL: test_call_void_func_void_mayclobber_s31: 204; GFX9: ; %bb.0: 205; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX9-NEXT: s_mov_b32 s34, s33 207; GFX9-NEXT: s_mov_b32 s33, s32 208; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 209; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 210; GFX9-NEXT: s_mov_b64 exec, s[36:37] 211; GFX9-NEXT: v_writelane_b32 v40, s34, 3 212; GFX9-NEXT: v_writelane_b32 v40, s4, 0 213; GFX9-NEXT: v_writelane_b32 v40, s30, 1 214; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 215; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 216; GFX9-NEXT: s_addk_i32 s32, 0x400 217; GFX9-NEXT: v_writelane_b32 v40, s31, 2 218; GFX9-NEXT: ;;#ASMSTART 219; GFX9-NEXT: ; def s31 220; GFX9-NEXT: ;;#ASMEND 221; GFX9-NEXT: s_mov_b32 s4, s31 222; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 223; GFX9-NEXT: s_mov_b32 s31, s4 224; GFX9-NEXT: ;;#ASMSTART 225; GFX9-NEXT: ; use s31 226; GFX9-NEXT: ;;#ASMEND 227; GFX9-NEXT: v_readlane_b32 s31, v40, 2 228; GFX9-NEXT: v_readlane_b32 s30, v40, 1 229; GFX9-NEXT: v_readlane_b32 s4, v40, 0 230; GFX9-NEXT: s_mov_b32 s32, s33 231; GFX9-NEXT: v_readlane_b32 s34, v40, 3 232; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 233; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 234; GFX9-NEXT: s_mov_b64 exec, s[36:37] 235; GFX9-NEXT: s_mov_b32 s33, s34 236; GFX9-NEXT: s_waitcnt vmcnt(0) 237; GFX9-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX10-LABEL: test_call_void_func_void_mayclobber_s31: 240; GFX10: ; %bb.0: 241; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX10-NEXT: s_mov_b32 s34, s33 243; GFX10-NEXT: s_mov_b32 s33, s32 244; GFX10-NEXT: s_or_saveexec_b32 s35, -1 245; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 246; GFX10-NEXT: s_waitcnt_depctr 0xffe3 247; GFX10-NEXT: s_mov_b32 exec_lo, s35 248; GFX10-NEXT: v_writelane_b32 v40, s34, 3 249; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 250; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 251; GFX10-NEXT: s_addk_i32 s32, 0x200 252; GFX10-NEXT: v_writelane_b32 v40, s4, 0 253; GFX10-NEXT: v_writelane_b32 v40, s30, 1 254; GFX10-NEXT: v_writelane_b32 v40, s31, 2 255; GFX10-NEXT: ;;#ASMSTART 256; GFX10-NEXT: ; def s31 257; GFX10-NEXT: ;;#ASMEND 258; GFX10-NEXT: s_mov_b32 s4, s31 259; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 260; GFX10-NEXT: s_mov_b32 s31, s4 261; GFX10-NEXT: ;;#ASMSTART 262; GFX10-NEXT: ; use s31 263; GFX10-NEXT: ;;#ASMEND 264; GFX10-NEXT: v_readlane_b32 s31, v40, 2 265; GFX10-NEXT: v_readlane_b32 s30, v40, 1 266; GFX10-NEXT: v_readlane_b32 s4, v40, 0 267; GFX10-NEXT: s_mov_b32 s32, s33 268; GFX10-NEXT: v_readlane_b32 s34, v40, 3 269; GFX10-NEXT: s_or_saveexec_b32 s35, -1 270; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 271; GFX10-NEXT: s_waitcnt_depctr 0xffe3 272; GFX10-NEXT: s_mov_b32 exec_lo, s35 273; GFX10-NEXT: s_mov_b32 s33, s34 274; GFX10-NEXT: s_waitcnt vmcnt(0) 275; GFX10-NEXT: s_setpc_b64 s[30:31] 276; 277; GFX11-LABEL: test_call_void_func_void_mayclobber_s31: 278; GFX11: ; %bb.0: 279; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX11-NEXT: s_mov_b32 s0, s33 281; GFX11-NEXT: s_mov_b32 s33, s32 282; GFX11-NEXT: s_or_saveexec_b32 s1, -1 283; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 284; GFX11-NEXT: s_mov_b32 exec_lo, s1 285; GFX11-NEXT: v_writelane_b32 v40, s0, 3 286; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 287; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 288; GFX11-NEXT: s_add_i32 s32, s32, 16 289; GFX11-NEXT: v_writelane_b32 v40, s4, 0 290; GFX11-NEXT: v_writelane_b32 v40, s30, 1 291; GFX11-NEXT: v_writelane_b32 v40, s31, 2 292; GFX11-NEXT: ;;#ASMSTART 293; GFX11-NEXT: ; def s31 294; GFX11-NEXT: ;;#ASMEND 295; GFX11-NEXT: s_mov_b32 s4, s31 296; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 297; GFX11-NEXT: s_mov_b32 s31, s4 298; GFX11-NEXT: ;;#ASMSTART 299; GFX11-NEXT: ; use s31 300; GFX11-NEXT: ;;#ASMEND 301; GFX11-NEXT: v_readlane_b32 s31, v40, 2 302; GFX11-NEXT: v_readlane_b32 s30, v40, 1 303; GFX11-NEXT: v_readlane_b32 s4, v40, 0 304; GFX11-NEXT: s_mov_b32 s32, s33 305; GFX11-NEXT: v_readlane_b32 s0, v40, 3 306; GFX11-NEXT: s_or_saveexec_b32 s1, -1 307; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 308; GFX11-NEXT: s_mov_b32 exec_lo, s1 309; GFX11-NEXT: s_mov_b32 s33, s0 310; GFX11-NEXT: s_waitcnt vmcnt(0) 311; GFX11-NEXT: s_setpc_b64 s[30:31] 312 %s31 = call i32 asm sideeffect "; def $0", "={s31}"() 313 call amdgpu_gfx void @external_void_func_void() 314 call void asm sideeffect "; use $0", "{s31}"(i32 %s31) 315 ret void 316} 317 318define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 { 319; GFX9-LABEL: test_call_void_func_void_mayclobber_v31: 320; GFX9: ; %bb.0: 321; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX9-NEXT: s_mov_b32 s34, s33 323; GFX9-NEXT: s_mov_b32 s33, s32 324; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 325; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 326; GFX9-NEXT: s_mov_b64 exec, s[36:37] 327; GFX9-NEXT: v_writelane_b32 v41, s34, 2 328; GFX9-NEXT: v_writelane_b32 v41, s30, 0 329; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 330; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 331; GFX9-NEXT: s_addk_i32 s32, 0x400 332; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 333; GFX9-NEXT: v_writelane_b32 v41, s31, 1 334; GFX9-NEXT: ;;#ASMSTART 335; GFX9-NEXT: ; def v31 336; GFX9-NEXT: ;;#ASMEND 337; GFX9-NEXT: v_mov_b32_e32 v40, v31 338; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 339; GFX9-NEXT: v_mov_b32_e32 v31, v40 340; GFX9-NEXT: ;;#ASMSTART 341; GFX9-NEXT: ; use v31 342; GFX9-NEXT: ;;#ASMEND 343; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 344; GFX9-NEXT: v_readlane_b32 s31, v41, 1 345; GFX9-NEXT: v_readlane_b32 s30, v41, 0 346; GFX9-NEXT: s_mov_b32 s32, s33 347; GFX9-NEXT: v_readlane_b32 s34, v41, 2 348; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 349; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 350; GFX9-NEXT: s_mov_b64 exec, s[36:37] 351; GFX9-NEXT: s_mov_b32 s33, s34 352; GFX9-NEXT: s_waitcnt vmcnt(0) 353; GFX9-NEXT: s_setpc_b64 s[30:31] 354; 355; GFX10-LABEL: test_call_void_func_void_mayclobber_v31: 356; GFX10: ; %bb.0: 357; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX10-NEXT: s_mov_b32 s34, s33 359; GFX10-NEXT: s_mov_b32 s33, s32 360; GFX10-NEXT: s_or_saveexec_b32 s35, -1 361; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 362; GFX10-NEXT: s_waitcnt_depctr 0xffe3 363; GFX10-NEXT: s_mov_b32 exec_lo, s35 364; GFX10-NEXT: v_writelane_b32 v41, s34, 2 365; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 366; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 367; GFX10-NEXT: s_addk_i32 s32, 0x200 368; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 369; GFX10-NEXT: v_writelane_b32 v41, s30, 0 370; GFX10-NEXT: ;;#ASMSTART 371; GFX10-NEXT: ; def v31 372; GFX10-NEXT: ;;#ASMEND 373; GFX10-NEXT: v_mov_b32_e32 v40, v31 374; GFX10-NEXT: v_writelane_b32 v41, s31, 1 375; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 376; GFX10-NEXT: v_mov_b32_e32 v31, v40 377; GFX10-NEXT: ;;#ASMSTART 378; GFX10-NEXT: ; use v31 379; GFX10-NEXT: ;;#ASMEND 380; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 381; GFX10-NEXT: v_readlane_b32 s31, v41, 1 382; GFX10-NEXT: v_readlane_b32 s30, v41, 0 383; GFX10-NEXT: s_mov_b32 s32, s33 384; GFX10-NEXT: v_readlane_b32 s34, v41, 2 385; GFX10-NEXT: s_or_saveexec_b32 s35, -1 386; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 387; GFX10-NEXT: s_waitcnt_depctr 0xffe3 388; GFX10-NEXT: s_mov_b32 exec_lo, s35 389; GFX10-NEXT: s_mov_b32 s33, s34 390; GFX10-NEXT: s_waitcnt vmcnt(0) 391; GFX10-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX11-LABEL: test_call_void_func_void_mayclobber_v31: 394; GFX11: ; %bb.0: 395; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX11-NEXT: s_mov_b32 s0, s33 397; GFX11-NEXT: s_mov_b32 s33, s32 398; GFX11-NEXT: s_or_saveexec_b32 s1, -1 399; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill 400; GFX11-NEXT: s_mov_b32 exec_lo, s1 401; GFX11-NEXT: v_writelane_b32 v41, s0, 2 402; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 403; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 404; GFX11-NEXT: s_add_i32 s32, s32, 16 405; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 406; GFX11-NEXT: v_writelane_b32 v41, s30, 0 407; GFX11-NEXT: ;;#ASMSTART 408; GFX11-NEXT: ; def v31 409; GFX11-NEXT: ;;#ASMEND 410; GFX11-NEXT: v_mov_b32_e32 v40, v31 411; GFX11-NEXT: v_writelane_b32 v41, s31, 1 412; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 413; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 414; GFX11-NEXT: v_mov_b32_e32 v31, v40 415; GFX11-NEXT: ;;#ASMSTART 416; GFX11-NEXT: ; use v31 417; GFX11-NEXT: ;;#ASMEND 418; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 419; GFX11-NEXT: v_readlane_b32 s31, v41, 1 420; GFX11-NEXT: v_readlane_b32 s30, v41, 0 421; GFX11-NEXT: s_mov_b32 s32, s33 422; GFX11-NEXT: v_readlane_b32 s0, v41, 2 423; GFX11-NEXT: s_or_saveexec_b32 s1, -1 424; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload 425; GFX11-NEXT: s_mov_b32 exec_lo, s1 426; GFX11-NEXT: s_mov_b32 s33, s0 427; GFX11-NEXT: s_waitcnt vmcnt(0) 428; GFX11-NEXT: s_setpc_b64 s[30:31] 429 %v31 = call i32 asm sideeffect "; def $0", "={v31}"() 430 call amdgpu_gfx void @external_void_func_void() 431 call void asm sideeffect "; use $0", "{v31}"(i32 %v31) 432 ret void 433} 434 435 436define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 { 437; GFX9-LABEL: test_call_void_func_void_preserves_s33: 438; GFX9: ; %bb.0: 439; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX9-NEXT: s_mov_b32 s34, s33 441; GFX9-NEXT: s_mov_b32 s33, s32 442; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 443; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 444; GFX9-NEXT: s_mov_b64 exec, s[36:37] 445; GFX9-NEXT: v_writelane_b32 v40, s34, 3 446; GFX9-NEXT: v_writelane_b32 v40, s4, 0 447; GFX9-NEXT: v_writelane_b32 v40, s30, 1 448; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 449; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 450; GFX9-NEXT: s_addk_i32 s32, 0x400 451; GFX9-NEXT: v_writelane_b32 v40, s31, 2 452; GFX9-NEXT: ;;#ASMSTART 453; GFX9-NEXT: ; def s33 454; GFX9-NEXT: ;;#ASMEND 455; GFX9-NEXT: s_mov_b32 s4, s33 456; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 457; GFX9-NEXT: s_mov_b32 s33, s4 458; GFX9-NEXT: ;;#ASMSTART 459; GFX9-NEXT: ; use s33 460; GFX9-NEXT: ;;#ASMEND 461; GFX9-NEXT: v_readlane_b32 s31, v40, 2 462; GFX9-NEXT: v_readlane_b32 s30, v40, 1 463; GFX9-NEXT: v_readlane_b32 s4, v40, 0 464; GFX9-NEXT: s_mov_b32 s32, s33 465; GFX9-NEXT: v_readlane_b32 s34, v40, 3 466; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 467; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 468; GFX9-NEXT: s_mov_b64 exec, s[36:37] 469; GFX9-NEXT: s_mov_b32 s33, s34 470; GFX9-NEXT: s_waitcnt vmcnt(0) 471; GFX9-NEXT: s_setpc_b64 s[30:31] 472; 473; GFX10-LABEL: test_call_void_func_void_preserves_s33: 474; GFX10: ; %bb.0: 475; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX10-NEXT: s_mov_b32 s34, s33 477; GFX10-NEXT: s_mov_b32 s33, s32 478; GFX10-NEXT: s_or_saveexec_b32 s35, -1 479; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 480; GFX10-NEXT: s_waitcnt_depctr 0xffe3 481; GFX10-NEXT: s_mov_b32 exec_lo, s35 482; GFX10-NEXT: v_writelane_b32 v40, s34, 3 483; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 484; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 485; GFX10-NEXT: s_addk_i32 s32, 0x200 486; GFX10-NEXT: ;;#ASMSTART 487; GFX10-NEXT: ; def s33 488; GFX10-NEXT: ;;#ASMEND 489; GFX10-NEXT: v_writelane_b32 v40, s4, 0 490; GFX10-NEXT: s_mov_b32 s4, s33 491; GFX10-NEXT: v_writelane_b32 v40, s30, 1 492; GFX10-NEXT: v_writelane_b32 v40, s31, 2 493; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 494; GFX10-NEXT: s_mov_b32 s33, s4 495; GFX10-NEXT: ;;#ASMSTART 496; GFX10-NEXT: ; use s33 497; GFX10-NEXT: ;;#ASMEND 498; GFX10-NEXT: v_readlane_b32 s31, v40, 2 499; GFX10-NEXT: v_readlane_b32 s30, v40, 1 500; GFX10-NEXT: v_readlane_b32 s4, v40, 0 501; GFX10-NEXT: s_mov_b32 s32, s33 502; GFX10-NEXT: v_readlane_b32 s34, v40, 3 503; GFX10-NEXT: s_or_saveexec_b32 s35, -1 504; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 505; GFX10-NEXT: s_waitcnt_depctr 0xffe3 506; GFX10-NEXT: s_mov_b32 exec_lo, s35 507; GFX10-NEXT: s_mov_b32 s33, s34 508; GFX10-NEXT: s_waitcnt vmcnt(0) 509; GFX10-NEXT: s_setpc_b64 s[30:31] 510; 511; GFX11-LABEL: test_call_void_func_void_preserves_s33: 512; GFX11: ; %bb.0: 513; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 514; GFX11-NEXT: s_mov_b32 s0, s33 515; GFX11-NEXT: s_mov_b32 s33, s32 516; GFX11-NEXT: s_or_saveexec_b32 s1, -1 517; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 518; GFX11-NEXT: s_mov_b32 exec_lo, s1 519; GFX11-NEXT: v_writelane_b32 v40, s0, 3 520; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 521; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 522; GFX11-NEXT: s_add_i32 s32, s32, 16 523; GFX11-NEXT: ;;#ASMSTART 524; GFX11-NEXT: ; def s33 525; GFX11-NEXT: ;;#ASMEND 526; GFX11-NEXT: v_writelane_b32 v40, s4, 0 527; GFX11-NEXT: s_mov_b32 s4, s33 528; GFX11-NEXT: v_writelane_b32 v40, s30, 1 529; GFX11-NEXT: v_writelane_b32 v40, s31, 2 530; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 531; GFX11-NEXT: s_mov_b32 s33, s4 532; GFX11-NEXT: ;;#ASMSTART 533; GFX11-NEXT: ; use s33 534; GFX11-NEXT: ;;#ASMEND 535; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 536; GFX11-NEXT: v_readlane_b32 s31, v40, 2 537; GFX11-NEXT: v_readlane_b32 s30, v40, 1 538; GFX11-NEXT: v_readlane_b32 s4, v40, 0 539; GFX11-NEXT: s_mov_b32 s32, s33 540; GFX11-NEXT: v_readlane_b32 s0, v40, 3 541; GFX11-NEXT: s_or_saveexec_b32 s1, -1 542; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 543; GFX11-NEXT: s_mov_b32 exec_lo, s1 544; GFX11-NEXT: s_mov_b32 s33, s0 545; GFX11-NEXT: s_waitcnt vmcnt(0) 546; GFX11-NEXT: s_setpc_b64 s[30:31] 547 %s33 = call i32 asm sideeffect "; def $0", "={s33}"() 548 call amdgpu_gfx void @external_void_func_void() 549 call void asm sideeffect "; use $0", "{s33}"(i32 %s33) 550 ret void 551} 552 553define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 { 554; GFX9-LABEL: test_call_void_func_void_preserves_s34: 555; GFX9: ; %bb.0: 556; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 557; GFX9-NEXT: s_mov_b32 s34, s33 558; GFX9-NEXT: s_mov_b32 s33, s32 559; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 560; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 561; GFX9-NEXT: s_mov_b64 exec, s[36:37] 562; GFX9-NEXT: v_writelane_b32 v40, s34, 3 563; GFX9-NEXT: v_writelane_b32 v40, s4, 0 564; GFX9-NEXT: ;;#ASMSTART 565; GFX9-NEXT: ; def s34 566; GFX9-NEXT: ;;#ASMEND 567; GFX9-NEXT: v_writelane_b32 v40, s30, 1 568; GFX9-NEXT: s_mov_b32 s4, s34 569; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 570; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 571; GFX9-NEXT: s_addk_i32 s32, 0x400 572; GFX9-NEXT: v_writelane_b32 v40, s31, 2 573; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 574; GFX9-NEXT: s_mov_b32 s34, s4 575; GFX9-NEXT: ;;#ASMSTART 576; GFX9-NEXT: ; use s34 577; GFX9-NEXT: ;;#ASMEND 578; GFX9-NEXT: v_readlane_b32 s31, v40, 2 579; GFX9-NEXT: v_readlane_b32 s30, v40, 1 580; GFX9-NEXT: v_readlane_b32 s4, v40, 0 581; GFX9-NEXT: s_mov_b32 s32, s33 582; GFX9-NEXT: v_readlane_b32 s34, v40, 3 583; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 584; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 585; GFX9-NEXT: s_mov_b64 exec, s[36:37] 586; GFX9-NEXT: s_mov_b32 s33, s34 587; GFX9-NEXT: s_waitcnt vmcnt(0) 588; GFX9-NEXT: s_setpc_b64 s[30:31] 589; 590; GFX10-LABEL: test_call_void_func_void_preserves_s34: 591; GFX10: ; %bb.0: 592; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 593; GFX10-NEXT: s_mov_b32 s34, s33 594; GFX10-NEXT: s_mov_b32 s33, s32 595; GFX10-NEXT: s_or_saveexec_b32 s35, -1 596; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 597; GFX10-NEXT: s_waitcnt_depctr 0xffe3 598; GFX10-NEXT: s_mov_b32 exec_lo, s35 599; GFX10-NEXT: v_writelane_b32 v40, s34, 3 600; GFX10-NEXT: ;;#ASMSTART 601; GFX10-NEXT: ; def s34 602; GFX10-NEXT: ;;#ASMEND 603; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 604; GFX10-NEXT: s_addk_i32 s32, 0x200 605; GFX10-NEXT: v_writelane_b32 v40, s4, 0 606; GFX10-NEXT: s_mov_b32 s4, s34 607; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 608; GFX10-NEXT: v_writelane_b32 v40, s30, 1 609; GFX10-NEXT: v_writelane_b32 v40, s31, 2 610; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 611; GFX10-NEXT: s_mov_b32 s34, s4 612; GFX10-NEXT: ;;#ASMSTART 613; GFX10-NEXT: ; use s34 614; GFX10-NEXT: ;;#ASMEND 615; GFX10-NEXT: v_readlane_b32 s31, v40, 2 616; GFX10-NEXT: v_readlane_b32 s30, v40, 1 617; GFX10-NEXT: v_readlane_b32 s4, v40, 0 618; GFX10-NEXT: s_mov_b32 s32, s33 619; GFX10-NEXT: v_readlane_b32 s34, v40, 3 620; GFX10-NEXT: s_or_saveexec_b32 s35, -1 621; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 622; GFX10-NEXT: s_waitcnt_depctr 0xffe3 623; GFX10-NEXT: s_mov_b32 exec_lo, s35 624; GFX10-NEXT: s_mov_b32 s33, s34 625; GFX10-NEXT: s_waitcnt vmcnt(0) 626; GFX10-NEXT: s_setpc_b64 s[30:31] 627; 628; GFX11-LABEL: test_call_void_func_void_preserves_s34: 629; GFX11: ; %bb.0: 630; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX11-NEXT: s_mov_b32 s0, s33 632; GFX11-NEXT: s_mov_b32 s33, s32 633; GFX11-NEXT: s_or_saveexec_b32 s1, -1 634; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 635; GFX11-NEXT: s_mov_b32 exec_lo, s1 636; GFX11-NEXT: v_writelane_b32 v40, s0, 3 637; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 638; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 639; GFX11-NEXT: s_add_i32 s32, s32, 16 640; GFX11-NEXT: ;;#ASMSTART 641; GFX11-NEXT: ; def s34 642; GFX11-NEXT: ;;#ASMEND 643; GFX11-NEXT: v_writelane_b32 v40, s4, 0 644; GFX11-NEXT: s_mov_b32 s4, s34 645; GFX11-NEXT: v_writelane_b32 v40, s30, 1 646; GFX11-NEXT: v_writelane_b32 v40, s31, 2 647; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 648; GFX11-NEXT: s_mov_b32 s34, s4 649; GFX11-NEXT: ;;#ASMSTART 650; GFX11-NEXT: ; use s34 651; GFX11-NEXT: ;;#ASMEND 652; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 653; GFX11-NEXT: v_readlane_b32 s31, v40, 2 654; GFX11-NEXT: v_readlane_b32 s30, v40, 1 655; GFX11-NEXT: v_readlane_b32 s4, v40, 0 656; GFX11-NEXT: s_mov_b32 s32, s33 657; GFX11-NEXT: v_readlane_b32 s0, v40, 3 658; GFX11-NEXT: s_or_saveexec_b32 s1, -1 659; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 660; GFX11-NEXT: s_mov_b32 exec_lo, s1 661; GFX11-NEXT: s_mov_b32 s33, s0 662; GFX11-NEXT: s_waitcnt vmcnt(0) 663; GFX11-NEXT: s_setpc_b64 s[30:31] 664 %s34 = call i32 asm sideeffect "; def $0", "={s34}"() 665 call amdgpu_gfx void @external_void_func_void() 666 call void asm sideeffect "; use $0", "{s34}"(i32 %s34) 667 ret void 668} 669 670define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 { 671; GFX9-LABEL: test_call_void_func_void_preserves_v40: 672; GFX9: ; %bb.0: 673; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX9-NEXT: s_mov_b32 s34, s33 675; GFX9-NEXT: s_mov_b32 s33, s32 676; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 677; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 678; GFX9-NEXT: s_mov_b64 exec, s[36:37] 679; GFX9-NEXT: v_writelane_b32 v41, s34, 2 680; GFX9-NEXT: v_writelane_b32 v41, s30, 0 681; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 682; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 683; GFX9-NEXT: s_addk_i32 s32, 0x400 684; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 685; GFX9-NEXT: v_writelane_b32 v41, s31, 1 686; GFX9-NEXT: ;;#ASMSTART 687; GFX9-NEXT: ; def v40 688; GFX9-NEXT: ;;#ASMEND 689; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 690; GFX9-NEXT: ;;#ASMSTART 691; GFX9-NEXT: ; use v40 692; GFX9-NEXT: ;;#ASMEND 693; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 694; GFX9-NEXT: v_readlane_b32 s31, v41, 1 695; GFX9-NEXT: v_readlane_b32 s30, v41, 0 696; GFX9-NEXT: s_mov_b32 s32, s33 697; GFX9-NEXT: v_readlane_b32 s34, v41, 2 698; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 699; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 700; GFX9-NEXT: s_mov_b64 exec, s[36:37] 701; GFX9-NEXT: s_mov_b32 s33, s34 702; GFX9-NEXT: s_waitcnt vmcnt(0) 703; GFX9-NEXT: s_setpc_b64 s[30:31] 704; 705; GFX10-LABEL: test_call_void_func_void_preserves_v40: 706; GFX10: ; %bb.0: 707; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 708; GFX10-NEXT: s_mov_b32 s34, s33 709; GFX10-NEXT: s_mov_b32 s33, s32 710; GFX10-NEXT: s_or_saveexec_b32 s35, -1 711; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 712; GFX10-NEXT: s_waitcnt_depctr 0xffe3 713; GFX10-NEXT: s_mov_b32 exec_lo, s35 714; GFX10-NEXT: v_writelane_b32 v41, s34, 2 715; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 716; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 717; GFX10-NEXT: s_addk_i32 s32, 0x200 718; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 719; GFX10-NEXT: v_writelane_b32 v41, s30, 0 720; GFX10-NEXT: ;;#ASMSTART 721; GFX10-NEXT: ; def v40 722; GFX10-NEXT: ;;#ASMEND 723; GFX10-NEXT: v_writelane_b32 v41, s31, 1 724; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 725; GFX10-NEXT: ;;#ASMSTART 726; GFX10-NEXT: ; use v40 727; GFX10-NEXT: ;;#ASMEND 728; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 729; GFX10-NEXT: v_readlane_b32 s31, v41, 1 730; GFX10-NEXT: v_readlane_b32 s30, v41, 0 731; GFX10-NEXT: s_mov_b32 s32, s33 732; GFX10-NEXT: v_readlane_b32 s34, v41, 2 733; GFX10-NEXT: s_or_saveexec_b32 s35, -1 734; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 735; GFX10-NEXT: s_waitcnt_depctr 0xffe3 736; GFX10-NEXT: s_mov_b32 exec_lo, s35 737; GFX10-NEXT: s_mov_b32 s33, s34 738; GFX10-NEXT: s_waitcnt vmcnt(0) 739; GFX10-NEXT: s_setpc_b64 s[30:31] 740; 741; GFX11-LABEL: test_call_void_func_void_preserves_v40: 742; GFX11: ; %bb.0: 743; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 744; GFX11-NEXT: s_mov_b32 s0, s33 745; GFX11-NEXT: s_mov_b32 s33, s32 746; GFX11-NEXT: s_or_saveexec_b32 s1, -1 747; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill 748; GFX11-NEXT: s_mov_b32 exec_lo, s1 749; GFX11-NEXT: v_writelane_b32 v41, s0, 2 750; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 751; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 752; GFX11-NEXT: s_add_i32 s32, s32, 16 753; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 754; GFX11-NEXT: v_writelane_b32 v41, s30, 0 755; GFX11-NEXT: ;;#ASMSTART 756; GFX11-NEXT: ; def v40 757; GFX11-NEXT: ;;#ASMEND 758; GFX11-NEXT: v_writelane_b32 v41, s31, 1 759; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 760; GFX11-NEXT: ;;#ASMSTART 761; GFX11-NEXT: ; use v40 762; GFX11-NEXT: ;;#ASMEND 763; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 764; GFX11-NEXT: v_readlane_b32 s31, v41, 1 765; GFX11-NEXT: v_readlane_b32 s30, v41, 0 766; GFX11-NEXT: s_mov_b32 s32, s33 767; GFX11-NEXT: v_readlane_b32 s0, v41, 2 768; GFX11-NEXT: s_or_saveexec_b32 s1, -1 769; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload 770; GFX11-NEXT: s_mov_b32 exec_lo, s1 771; GFX11-NEXT: s_mov_b32 s33, s0 772; GFX11-NEXT: s_waitcnt vmcnt(0) 773; GFX11-NEXT: s_setpc_b64 s[30:31] 774 %v40 = call i32 asm sideeffect "; def $0", "={v40}"() 775 call amdgpu_gfx void @external_void_func_void() 776 call void asm sideeffect "; use $0", "{v40}"(i32 %v40) 777 ret void 778} 779 780define hidden void @void_func_void_clobber_s33() #1 { 781; GFX9-LABEL: void_func_void_clobber_s33: 782; GFX9: ; %bb.0: 783; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 784; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 785; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill 786; GFX9-NEXT: s_mov_b64 exec, s[4:5] 787; GFX9-NEXT: v_writelane_b32 v0, s33, 0 788; GFX9-NEXT: ;;#ASMSTART 789; GFX9-NEXT: ; clobber 790; GFX9-NEXT: ;;#ASMEND 791; GFX9-NEXT: v_readlane_b32 s33, v0, 0 792; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 793; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 794; GFX9-NEXT: s_mov_b64 exec, s[4:5] 795; GFX9-NEXT: s_waitcnt vmcnt(0) 796; GFX9-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX10-LABEL: void_func_void_clobber_s33: 799; GFX10: ; %bb.0: 800; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 802; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill 803; GFX10-NEXT: s_waitcnt_depctr 0xffe3 804; GFX10-NEXT: s_mov_b32 exec_lo, s4 805; GFX10-NEXT: v_writelane_b32 v0, s33, 0 806; GFX10-NEXT: ;;#ASMSTART 807; GFX10-NEXT: ; clobber 808; GFX10-NEXT: ;;#ASMEND 809; GFX10-NEXT: v_readlane_b32 s33, v0, 0 810; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 811; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 812; GFX10-NEXT: s_waitcnt_depctr 0xffe3 813; GFX10-NEXT: s_mov_b32 exec_lo, s4 814; GFX10-NEXT: s_waitcnt vmcnt(0) 815; GFX10-NEXT: s_setpc_b64 s[30:31] 816; 817; GFX11-LABEL: void_func_void_clobber_s33: 818; GFX11: ; %bb.0: 819; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 820; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 821; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill 822; GFX11-NEXT: s_mov_b32 exec_lo, s0 823; GFX11-NEXT: v_writelane_b32 v0, s33, 0 824; GFX11-NEXT: ;;#ASMSTART 825; GFX11-NEXT: ; clobber 826; GFX11-NEXT: ;;#ASMEND 827; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 828; GFX11-NEXT: v_readlane_b32 s33, v0, 0 829; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 830; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload 831; GFX11-NEXT: s_mov_b32 exec_lo, s0 832; GFX11-NEXT: s_waitcnt vmcnt(0) 833; GFX11-NEXT: s_setpc_b64 s[30:31] 834 call void asm sideeffect "; clobber", "~{s33}"() #0 835 ret void 836} 837 838define hidden void @void_func_void_clobber_s34() #1 { 839; GFX9-LABEL: void_func_void_clobber_s34: 840; GFX9: ; %bb.0: 841; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 842; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 843; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill 844; GFX9-NEXT: s_mov_b64 exec, s[4:5] 845; GFX9-NEXT: v_writelane_b32 v0, s34, 0 846; GFX9-NEXT: ;;#ASMSTART 847; GFX9-NEXT: ; clobber 848; GFX9-NEXT: ;;#ASMEND 849; GFX9-NEXT: v_readlane_b32 s34, v0, 0 850; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 851; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 852; GFX9-NEXT: s_mov_b64 exec, s[4:5] 853; GFX9-NEXT: s_waitcnt vmcnt(0) 854; GFX9-NEXT: s_setpc_b64 s[30:31] 855; 856; GFX10-LABEL: void_func_void_clobber_s34: 857; GFX10: ; %bb.0: 858; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 859; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 860; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill 861; GFX10-NEXT: s_waitcnt_depctr 0xffe3 862; GFX10-NEXT: s_mov_b32 exec_lo, s4 863; GFX10-NEXT: v_writelane_b32 v0, s34, 0 864; GFX10-NEXT: ;;#ASMSTART 865; GFX10-NEXT: ; clobber 866; GFX10-NEXT: ;;#ASMEND 867; GFX10-NEXT: v_readlane_b32 s34, v0, 0 868; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 869; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 870; GFX10-NEXT: s_waitcnt_depctr 0xffe3 871; GFX10-NEXT: s_mov_b32 exec_lo, s4 872; GFX10-NEXT: s_waitcnt vmcnt(0) 873; GFX10-NEXT: s_setpc_b64 s[30:31] 874; 875; GFX11-LABEL: void_func_void_clobber_s34: 876; GFX11: ; %bb.0: 877; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 878; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 879; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill 880; GFX11-NEXT: s_mov_b32 exec_lo, s0 881; GFX11-NEXT: v_writelane_b32 v0, s34, 0 882; GFX11-NEXT: ;;#ASMSTART 883; GFX11-NEXT: ; clobber 884; GFX11-NEXT: ;;#ASMEND 885; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 886; GFX11-NEXT: v_readlane_b32 s34, v0, 0 887; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 888; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload 889; GFX11-NEXT: s_mov_b32 exec_lo, s0 890; GFX11-NEXT: s_waitcnt vmcnt(0) 891; GFX11-NEXT: s_setpc_b64 s[30:31] 892 call void asm sideeffect "; clobber", "~{s34}"() #0 893 ret void 894} 895 896define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { 897; GFX9-LABEL: test_call_void_func_void_clobber_s33: 898; GFX9: ; %bb.0: 899; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 900; GFX9-NEXT: s_mov_b32 s34, s33 901; GFX9-NEXT: s_mov_b32 s33, s32 902; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 903; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 904; GFX9-NEXT: s_mov_b64 exec, s[36:37] 905; GFX9-NEXT: v_writelane_b32 v40, s34, 2 906; GFX9-NEXT: v_writelane_b32 v40, s30, 0 907; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi 908; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo 909; GFX9-NEXT: s_addk_i32 s32, 0x400 910; GFX9-NEXT: v_writelane_b32 v40, s31, 1 911; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 912; GFX9-NEXT: v_readlane_b32 s31, v40, 1 913; GFX9-NEXT: v_readlane_b32 s30, v40, 0 914; GFX9-NEXT: s_mov_b32 s32, s33 915; GFX9-NEXT: v_readlane_b32 s34, v40, 2 916; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 917; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 918; GFX9-NEXT: s_mov_b64 exec, s[36:37] 919; GFX9-NEXT: s_mov_b32 s33, s34 920; GFX9-NEXT: s_waitcnt vmcnt(0) 921; GFX9-NEXT: s_setpc_b64 s[30:31] 922; 923; GFX10-LABEL: test_call_void_func_void_clobber_s33: 924; GFX10: ; %bb.0: 925; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 926; GFX10-NEXT: s_mov_b32 s34, s33 927; GFX10-NEXT: s_mov_b32 s33, s32 928; GFX10-NEXT: s_or_saveexec_b32 s35, -1 929; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 930; GFX10-NEXT: s_waitcnt_depctr 0xffe3 931; GFX10-NEXT: s_mov_b32 exec_lo, s35 932; GFX10-NEXT: v_writelane_b32 v40, s34, 2 933; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi 934; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo 935; GFX10-NEXT: s_addk_i32 s32, 0x200 936; GFX10-NEXT: v_writelane_b32 v40, s30, 0 937; GFX10-NEXT: v_writelane_b32 v40, s31, 1 938; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 939; GFX10-NEXT: v_readlane_b32 s31, v40, 1 940; GFX10-NEXT: v_readlane_b32 s30, v40, 0 941; GFX10-NEXT: s_mov_b32 s32, s33 942; GFX10-NEXT: v_readlane_b32 s34, v40, 2 943; GFX10-NEXT: s_or_saveexec_b32 s35, -1 944; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 945; GFX10-NEXT: s_waitcnt_depctr 0xffe3 946; GFX10-NEXT: s_mov_b32 exec_lo, s35 947; GFX10-NEXT: s_mov_b32 s33, s34 948; GFX10-NEXT: s_waitcnt vmcnt(0) 949; GFX10-NEXT: s_setpc_b64 s[30:31] 950; 951; GFX11-LABEL: test_call_void_func_void_clobber_s33: 952; GFX11: ; %bb.0: 953; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954; GFX11-NEXT: s_mov_b32 s0, s33 955; GFX11-NEXT: s_mov_b32 s33, s32 956; GFX11-NEXT: s_or_saveexec_b32 s1, -1 957; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 958; GFX11-NEXT: s_mov_b32 exec_lo, s1 959; GFX11-NEXT: v_writelane_b32 v40, s0, 2 960; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi 961; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo 962; GFX11-NEXT: s_add_i32 s32, s32, 16 963; GFX11-NEXT: v_writelane_b32 v40, s30, 0 964; GFX11-NEXT: v_writelane_b32 v40, s31, 1 965; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 966; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 967; GFX11-NEXT: v_readlane_b32 s31, v40, 1 968; GFX11-NEXT: v_readlane_b32 s30, v40, 0 969; GFX11-NEXT: s_mov_b32 s32, s33 970; GFX11-NEXT: v_readlane_b32 s0, v40, 2 971; GFX11-NEXT: s_or_saveexec_b32 s1, -1 972; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 973; GFX11-NEXT: s_mov_b32 exec_lo, s1 974; GFX11-NEXT: s_mov_b32 s33, s0 975; GFX11-NEXT: s_waitcnt vmcnt(0) 976; GFX11-NEXT: s_setpc_b64 s[30:31] 977 call amdgpu_gfx void @void_func_void_clobber_s33() 978 ret void 979} 980 981define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { 982; GFX9-LABEL: test_call_void_func_void_clobber_s34: 983; GFX9: ; %bb.0: 984; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 985; GFX9-NEXT: s_mov_b32 s34, s33 986; GFX9-NEXT: s_mov_b32 s33, s32 987; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 988; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 989; GFX9-NEXT: s_mov_b64 exec, s[36:37] 990; GFX9-NEXT: v_writelane_b32 v40, s34, 2 991; GFX9-NEXT: v_writelane_b32 v40, s30, 0 992; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi 993; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo 994; GFX9-NEXT: s_addk_i32 s32, 0x400 995; GFX9-NEXT: v_writelane_b32 v40, s31, 1 996; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 997; GFX9-NEXT: v_readlane_b32 s31, v40, 1 998; GFX9-NEXT: v_readlane_b32 s30, v40, 0 999; GFX9-NEXT: s_mov_b32 s32, s33 1000; GFX9-NEXT: v_readlane_b32 s34, v40, 2 1001; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 1002; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1003; GFX9-NEXT: s_mov_b64 exec, s[36:37] 1004; GFX9-NEXT: s_mov_b32 s33, s34 1005; GFX9-NEXT: s_waitcnt vmcnt(0) 1006; GFX9-NEXT: s_setpc_b64 s[30:31] 1007; 1008; GFX10-LABEL: test_call_void_func_void_clobber_s34: 1009; GFX10: ; %bb.0: 1010; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GFX10-NEXT: s_mov_b32 s34, s33 1012; GFX10-NEXT: s_mov_b32 s33, s32 1013; GFX10-NEXT: s_or_saveexec_b32 s35, -1 1014; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1015; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1016; GFX10-NEXT: s_mov_b32 exec_lo, s35 1017; GFX10-NEXT: v_writelane_b32 v40, s34, 2 1018; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi 1019; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo 1020; GFX10-NEXT: s_addk_i32 s32, 0x200 1021; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1022; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1023; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1024; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1025; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1026; GFX10-NEXT: s_mov_b32 s32, s33 1027; GFX10-NEXT: v_readlane_b32 s34, v40, 2 1028; GFX10-NEXT: s_or_saveexec_b32 s35, -1 1029; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1030; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1031; GFX10-NEXT: s_mov_b32 exec_lo, s35 1032; GFX10-NEXT: s_mov_b32 s33, s34 1033; GFX10-NEXT: s_waitcnt vmcnt(0) 1034; GFX10-NEXT: s_setpc_b64 s[30:31] 1035; 1036; GFX11-LABEL: test_call_void_func_void_clobber_s34: 1037; GFX11: ; %bb.0: 1038; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1039; GFX11-NEXT: s_mov_b32 s0, s33 1040; GFX11-NEXT: s_mov_b32 s33, s32 1041; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1042; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1043; GFX11-NEXT: s_mov_b32 exec_lo, s1 1044; GFX11-NEXT: v_writelane_b32 v40, s0, 2 1045; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi 1046; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo 1047; GFX11-NEXT: s_add_i32 s32, s32, 16 1048; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1049; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1050; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1051; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1052; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1053; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1054; GFX11-NEXT: s_mov_b32 s32, s33 1055; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1056; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1057; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1058; GFX11-NEXT: s_mov_b32 exec_lo, s1 1059; GFX11-NEXT: s_mov_b32 s33, s0 1060; GFX11-NEXT: s_waitcnt vmcnt(0) 1061; GFX11-NEXT: s_setpc_b64 s[30:31] 1062 call amdgpu_gfx void @void_func_void_clobber_s34() 1063 ret void 1064} 1065 1066define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { 1067; GFX9-LABEL: callee_saved_sgpr_kernel: 1068; GFX9: ; %bb.0: 1069; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1070; GFX9-NEXT: s_mov_b32 s34, s33 1071; GFX9-NEXT: s_mov_b32 s33, s32 1072; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 1073; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1074; GFX9-NEXT: s_mov_b64 exec, s[36:37] 1075; GFX9-NEXT: v_writelane_b32 v40, s34, 3 1076; GFX9-NEXT: v_writelane_b32 v40, s4, 0 1077; GFX9-NEXT: v_writelane_b32 v40, s30, 1 1078; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 1079; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 1080; GFX9-NEXT: s_addk_i32 s32, 0x400 1081; GFX9-NEXT: v_writelane_b32 v40, s31, 2 1082; GFX9-NEXT: ;;#ASMSTART 1083; GFX9-NEXT: ; def s40 1084; GFX9-NEXT: ;;#ASMEND 1085; GFX9-NEXT: s_mov_b32 s4, s40 1086; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1087; GFX9-NEXT: ;;#ASMSTART 1088; GFX9-NEXT: ; use s4 1089; GFX9-NEXT: ;;#ASMEND 1090; GFX9-NEXT: v_readlane_b32 s31, v40, 2 1091; GFX9-NEXT: v_readlane_b32 s30, v40, 1 1092; GFX9-NEXT: v_readlane_b32 s4, v40, 0 1093; GFX9-NEXT: s_mov_b32 s32, s33 1094; GFX9-NEXT: v_readlane_b32 s34, v40, 3 1095; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 1096; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1097; GFX9-NEXT: s_mov_b64 exec, s[36:37] 1098; GFX9-NEXT: s_mov_b32 s33, s34 1099; GFX9-NEXT: s_waitcnt vmcnt(0) 1100; GFX9-NEXT: s_setpc_b64 s[30:31] 1101; 1102; GFX10-LABEL: callee_saved_sgpr_kernel: 1103; GFX10: ; %bb.0: 1104; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1105; GFX10-NEXT: s_mov_b32 s34, s33 1106; GFX10-NEXT: s_mov_b32 s33, s32 1107; GFX10-NEXT: s_or_saveexec_b32 s35, -1 1108; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1109; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1110; GFX10-NEXT: s_mov_b32 exec_lo, s35 1111; GFX10-NEXT: v_writelane_b32 v40, s34, 3 1112; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 1113; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 1114; GFX10-NEXT: s_addk_i32 s32, 0x200 1115; GFX10-NEXT: ;;#ASMSTART 1116; GFX10-NEXT: ; def s40 1117; GFX10-NEXT: ;;#ASMEND 1118; GFX10-NEXT: v_writelane_b32 v40, s4, 0 1119; GFX10-NEXT: s_mov_b32 s4, s40 1120; GFX10-NEXT: v_writelane_b32 v40, s30, 1 1121; GFX10-NEXT: v_writelane_b32 v40, s31, 2 1122; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1123; GFX10-NEXT: ;;#ASMSTART 1124; GFX10-NEXT: ; use s4 1125; GFX10-NEXT: ;;#ASMEND 1126; GFX10-NEXT: v_readlane_b32 s31, v40, 2 1127; GFX10-NEXT: v_readlane_b32 s30, v40, 1 1128; GFX10-NEXT: v_readlane_b32 s4, v40, 0 1129; GFX10-NEXT: s_mov_b32 s32, s33 1130; GFX10-NEXT: v_readlane_b32 s34, v40, 3 1131; GFX10-NEXT: s_or_saveexec_b32 s35, -1 1132; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1133; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1134; GFX10-NEXT: s_mov_b32 exec_lo, s35 1135; GFX10-NEXT: s_mov_b32 s33, s34 1136; GFX10-NEXT: s_waitcnt vmcnt(0) 1137; GFX10-NEXT: s_setpc_b64 s[30:31] 1138; 1139; GFX11-LABEL: callee_saved_sgpr_kernel: 1140; GFX11: ; %bb.0: 1141; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1142; GFX11-NEXT: s_mov_b32 s0, s33 1143; GFX11-NEXT: s_mov_b32 s33, s32 1144; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1145; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1146; GFX11-NEXT: s_mov_b32 exec_lo, s1 1147; GFX11-NEXT: v_writelane_b32 v40, s0, 3 1148; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 1149; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 1150; GFX11-NEXT: s_add_i32 s32, s32, 16 1151; GFX11-NEXT: ;;#ASMSTART 1152; GFX11-NEXT: ; def s40 1153; GFX11-NEXT: ;;#ASMEND 1154; GFX11-NEXT: v_writelane_b32 v40, s4, 0 1155; GFX11-NEXT: s_mov_b32 s4, s40 1156; GFX11-NEXT: v_writelane_b32 v40, s30, 1 1157; GFX11-NEXT: v_writelane_b32 v40, s31, 2 1158; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1159; GFX11-NEXT: ;;#ASMSTART 1160; GFX11-NEXT: ; use s4 1161; GFX11-NEXT: ;;#ASMEND 1162; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1163; GFX11-NEXT: v_readlane_b32 s31, v40, 2 1164; GFX11-NEXT: v_readlane_b32 s30, v40, 1 1165; GFX11-NEXT: v_readlane_b32 s4, v40, 0 1166; GFX11-NEXT: s_mov_b32 s32, s33 1167; GFX11-NEXT: v_readlane_b32 s0, v40, 3 1168; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1169; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1170; GFX11-NEXT: s_mov_b32 exec_lo, s1 1171; GFX11-NEXT: s_mov_b32 s33, s0 1172; GFX11-NEXT: s_waitcnt vmcnt(0) 1173; GFX11-NEXT: s_setpc_b64 s[30:31] 1174 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 1175 call amdgpu_gfx void @external_void_func_void() 1176 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 1177 ret void 1178} 1179 1180define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { 1181; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel: 1182; GFX9: ; %bb.0: 1183; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1184; GFX9-NEXT: s_mov_b32 s34, s33 1185; GFX9-NEXT: s_mov_b32 s33, s32 1186; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 1187; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 1188; GFX9-NEXT: s_mov_b64 exec, s[36:37] 1189; GFX9-NEXT: v_writelane_b32 v41, s34, 3 1190; GFX9-NEXT: v_writelane_b32 v41, s4, 0 1191; GFX9-NEXT: v_writelane_b32 v41, s30, 1 1192; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 1193; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 1194; GFX9-NEXT: s_addk_i32 s32, 0x400 1195; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1196; GFX9-NEXT: v_writelane_b32 v41, s31, 2 1197; GFX9-NEXT: ;;#ASMSTART 1198; GFX9-NEXT: ; def s40 1199; GFX9-NEXT: ;;#ASMEND 1200; GFX9-NEXT: s_mov_b32 s4, s40 1201; GFX9-NEXT: ;;#ASMSTART 1202; GFX9-NEXT: ; def v32 1203; GFX9-NEXT: ;;#ASMEND 1204; GFX9-NEXT: v_mov_b32_e32 v40, v32 1205; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1206; GFX9-NEXT: ;;#ASMSTART 1207; GFX9-NEXT: ; use s4 1208; GFX9-NEXT: ;;#ASMEND 1209; GFX9-NEXT: ;;#ASMSTART 1210; GFX9-NEXT: ; use v40 1211; GFX9-NEXT: ;;#ASMEND 1212; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1213; GFX9-NEXT: v_readlane_b32 s31, v41, 2 1214; GFX9-NEXT: v_readlane_b32 s30, v41, 1 1215; GFX9-NEXT: v_readlane_b32 s4, v41, 0 1216; GFX9-NEXT: s_mov_b32 s32, s33 1217; GFX9-NEXT: v_readlane_b32 s34, v41, 3 1218; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 1219; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 1220; GFX9-NEXT: s_mov_b64 exec, s[36:37] 1221; GFX9-NEXT: s_mov_b32 s33, s34 1222; GFX9-NEXT: s_waitcnt vmcnt(0) 1223; GFX9-NEXT: s_setpc_b64 s[30:31] 1224; 1225; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel: 1226; GFX10: ; %bb.0: 1227; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1228; GFX10-NEXT: s_mov_b32 s34, s33 1229; GFX10-NEXT: s_mov_b32 s33, s32 1230; GFX10-NEXT: s_or_saveexec_b32 s35, -1 1231; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 1232; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1233; GFX10-NEXT: s_mov_b32 exec_lo, s35 1234; GFX10-NEXT: v_writelane_b32 v41, s34, 3 1235; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi 1236; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo 1237; GFX10-NEXT: s_addk_i32 s32, 0x200 1238; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1239; GFX10-NEXT: v_writelane_b32 v41, s4, 0 1240; GFX10-NEXT: ;;#ASMSTART 1241; GFX10-NEXT: ; def s40 1242; GFX10-NEXT: ;;#ASMEND 1243; GFX10-NEXT: s_mov_b32 s4, s40 1244; GFX10-NEXT: ;;#ASMSTART 1245; GFX10-NEXT: ; def v32 1246; GFX10-NEXT: ;;#ASMEND 1247; GFX10-NEXT: v_mov_b32_e32 v40, v32 1248; GFX10-NEXT: v_writelane_b32 v41, s30, 1 1249; GFX10-NEXT: v_writelane_b32 v41, s31, 2 1250; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1251; GFX10-NEXT: ;;#ASMSTART 1252; GFX10-NEXT: ; use s4 1253; GFX10-NEXT: ;;#ASMEND 1254; GFX10-NEXT: ;;#ASMSTART 1255; GFX10-NEXT: ; use v40 1256; GFX10-NEXT: ;;#ASMEND 1257; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1258; GFX10-NEXT: v_readlane_b32 s31, v41, 2 1259; GFX10-NEXT: v_readlane_b32 s30, v41, 1 1260; GFX10-NEXT: v_readlane_b32 s4, v41, 0 1261; GFX10-NEXT: s_mov_b32 s32, s33 1262; GFX10-NEXT: v_readlane_b32 s34, v41, 3 1263; GFX10-NEXT: s_or_saveexec_b32 s35, -1 1264; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 1265; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1266; GFX10-NEXT: s_mov_b32 exec_lo, s35 1267; GFX10-NEXT: s_mov_b32 s33, s34 1268; GFX10-NEXT: s_waitcnt vmcnt(0) 1269; GFX10-NEXT: s_setpc_b64 s[30:31] 1270; 1271; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel: 1272; GFX11: ; %bb.0: 1273; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1274; GFX11-NEXT: s_mov_b32 s0, s33 1275; GFX11-NEXT: s_mov_b32 s33, s32 1276; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1277; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill 1278; GFX11-NEXT: s_mov_b32 exec_lo, s1 1279; GFX11-NEXT: v_writelane_b32 v41, s0, 3 1280; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi 1281; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo 1282; GFX11-NEXT: s_add_i32 s32, s32, 16 1283; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1284; GFX11-NEXT: v_writelane_b32 v41, s4, 0 1285; GFX11-NEXT: ;;#ASMSTART 1286; GFX11-NEXT: ; def s40 1287; GFX11-NEXT: ;;#ASMEND 1288; GFX11-NEXT: s_mov_b32 s4, s40 1289; GFX11-NEXT: ;;#ASMSTART 1290; GFX11-NEXT: ; def v32 1291; GFX11-NEXT: ;;#ASMEND 1292; GFX11-NEXT: v_mov_b32_e32 v40, v32 1293; GFX11-NEXT: v_writelane_b32 v41, s30, 1 1294; GFX11-NEXT: v_writelane_b32 v41, s31, 2 1295; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1296; GFX11-NEXT: ;;#ASMSTART 1297; GFX11-NEXT: ; use s4 1298; GFX11-NEXT: ;;#ASMEND 1299; GFX11-NEXT: ;;#ASMSTART 1300; GFX11-NEXT: ; use v40 1301; GFX11-NEXT: ;;#ASMEND 1302; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1303; GFX11-NEXT: v_readlane_b32 s31, v41, 2 1304; GFX11-NEXT: v_readlane_b32 s30, v41, 1 1305; GFX11-NEXT: v_readlane_b32 s4, v41, 0 1306; GFX11-NEXT: s_mov_b32 s32, s33 1307; GFX11-NEXT: v_readlane_b32 s0, v41, 3 1308; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1309; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload 1310; GFX11-NEXT: s_mov_b32 exec_lo, s1 1311; GFX11-NEXT: s_mov_b32 s33, s0 1312; GFX11-NEXT: s_waitcnt vmcnt(0) 1313; GFX11-NEXT: s_setpc_b64 s[30:31] 1314 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 1315 %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0 1316 call amdgpu_gfx void @external_void_func_void() 1317 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 1318 call void asm sideeffect "; use $0", "v"(i32 %v32) #0 1319 ret void 1320} 1321 1322attributes #0 = { nounwind } 1323attributes #1 = { nounwind noinline } 1324