1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_1 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_3 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck -check-prefix=GFX12 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s 9 10; We aren't pressuring the SGPRs, so this can use the add with carry out pre-gfx9. 11define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { 12; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 13; GFX10_1: ; %bb.0: 14; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 16; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 17; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 18; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 19; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 20; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 21; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 22; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo 23; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 24; GFX10_1-NEXT: ;;#ASMSTART 25; GFX10_1-NEXT: ; use alloca0 v0 26; GFX10_1-NEXT: ;;#ASMEND 27; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 28; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 29; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 30; GFX10_1-NEXT: ;;#ASMSTART 31; GFX10_1-NEXT: ; use s59, scc 32; GFX10_1-NEXT: ;;#ASMEND 33; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 34; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 35; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 36; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 37; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 38; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 39; GFX10_1-NEXT: s_waitcnt vmcnt(0) 40; GFX10_1-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 43; GFX10_3: ; %bb.0: 44; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 46; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 47; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 48; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 49; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 50; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 51; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo 52; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 53; GFX10_3-NEXT: ;;#ASMSTART 54; GFX10_3-NEXT: ; use alloca0 v0 55; GFX10_3-NEXT: ;;#ASMEND 56; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 57; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 58; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 59; GFX10_3-NEXT: ;;#ASMSTART 60; GFX10_3-NEXT: ; use s59, scc 61; GFX10_3-NEXT: ;;#ASMEND 62; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 63; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 64; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 65; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 66; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 67; GFX10_3-NEXT: s_waitcnt vmcnt(0) 68; GFX10_3-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 71; GFX11: ; %bb.0: 72; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 74; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 75; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill 76; GFX11-NEXT: s_mov_b32 exec_lo, s0 77; GFX11-NEXT: s_add_i32 s0, s32, 64 78; GFX11-NEXT: v_writelane_b32 v1, s59, 0 79; GFX11-NEXT: v_mov_b32_e32 v0, s0 80; GFX11-NEXT: s_and_b32 s0, 0, exec_lo 81; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 82; GFX11-NEXT: ;;#ASMSTART 83; GFX11-NEXT: ; use alloca0 v0 84; GFX11-NEXT: ;;#ASMEND 85; GFX11-NEXT: s_bitcmp1_b32 s0, 0 86; GFX11-NEXT: s_bitset0_b32 s0, 0 87; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 88; GFX11-NEXT: s_mov_b32 s59, s0 89; GFX11-NEXT: ;;#ASMSTART 90; GFX11-NEXT: ; use s59, scc 91; GFX11-NEXT: ;;#ASMEND 92; GFX11-NEXT: v_readlane_b32 s59, v1, 0 93; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 94; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 95; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload 96; GFX11-NEXT: s_mov_b32 exec_lo, s0 97; GFX11-NEXT: s_waitcnt vmcnt(0) 98; GFX11-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 101; GFX12: ; %bb.0: 102; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 103; GFX12-NEXT: s_wait_expcnt 0x0 104; GFX12-NEXT: s_wait_samplecnt 0x0 105; GFX12-NEXT: s_wait_bvhcnt 0x0 106; GFX12-NEXT: s_wait_kmcnt 0x0 107; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 108; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill 109; GFX12-NEXT: s_wait_alu 0xfffe 110; GFX12-NEXT: s_mov_b32 exec_lo, s0 111; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 112; GFX12-NEXT: s_and_b32 s0, 0, exec_lo 113; GFX12-NEXT: v_writelane_b32 v1, s59, 0 114; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 115; GFX12-NEXT: v_mov_b32_e32 v0, s32 116; GFX12-NEXT: s_wait_alu 0xfffe 117; GFX12-NEXT: s_bitcmp1_b32 s0, 0 118; GFX12-NEXT: s_bitset0_b32 s0, 0 119; GFX12-NEXT: ;;#ASMSTART 120; GFX12-NEXT: ; use alloca0 v0 121; GFX12-NEXT: ;;#ASMEND 122; GFX12-NEXT: s_wait_alu 0xfffe 123; GFX12-NEXT: s_mov_b32 s59, s0 124; GFX12-NEXT: ;;#ASMSTART 125; GFX12-NEXT: ; use s59, scc 126; GFX12-NEXT: ;;#ASMEND 127; GFX12-NEXT: v_readlane_b32 s59, v1, 0 128; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 129; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload 130; GFX12-NEXT: s_wait_alu 0xfffe 131; GFX12-NEXT: s_mov_b32 exec_lo, s0 132; GFX12-NEXT: s_wait_loadcnt 0x0 133; GFX12-NEXT: s_wait_alu 0xfffe 134; GFX12-NEXT: s_setpc_b64 s[30:31] 135; 136; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 137; GFX8: ; %bb.0: 138; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 140; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 141; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 142; GFX8-NEXT: s_mov_b64 exec, s[4:5] 143; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 144; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 145; GFX8-NEXT: v_writelane_b32 v1, s59, 0 146; GFX8-NEXT: ;;#ASMSTART 147; GFX8-NEXT: ; use alloca0 v0 148; GFX8-NEXT: ;;#ASMEND 149; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 150; GFX8-NEXT: s_movk_i32 s59, 0x4040 151; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 152; GFX8-NEXT: v_readfirstlane_b32 s59, v0 153; GFX8-NEXT: s_and_b64 s[4:5], 0, exec 154; GFX8-NEXT: ;;#ASMSTART 155; GFX8-NEXT: ; use s59, scc 156; GFX8-NEXT: ;;#ASMEND 157; GFX8-NEXT: v_readlane_b32 s59, v1, 0 158; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 159; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 160; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 161; GFX8-NEXT: s_mov_b64 exec, s[4:5] 162; GFX8-NEXT: s_waitcnt vmcnt(0) 163; GFX8-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 166; GFX900: ; %bb.0: 167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 169; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 170; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 171; GFX900-NEXT: s_mov_b64 exec, s[4:5] 172; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 173; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 174; GFX900-NEXT: ;;#ASMSTART 175; GFX900-NEXT: ; use alloca0 v0 176; GFX900-NEXT: ;;#ASMEND 177; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 178; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 179; GFX900-NEXT: v_writelane_b32 v1, s59, 0 180; GFX900-NEXT: v_readfirstlane_b32 s59, v0 181; GFX900-NEXT: s_and_b64 s[4:5], 0, exec 182; GFX900-NEXT: ;;#ASMSTART 183; GFX900-NEXT: ; use s59, scc 184; GFX900-NEXT: ;;#ASMEND 185; GFX900-NEXT: v_readlane_b32 s59, v1, 0 186; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 187; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 188; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 189; GFX900-NEXT: s_mov_b64 exec, s[4:5] 190; GFX900-NEXT: s_waitcnt vmcnt(0) 191; GFX900-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: 194; GFX940: ; %bb.0: 195; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 197; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 198; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill 199; GFX940-NEXT: s_mov_b64 exec, s[0:1] 200; GFX940-NEXT: s_add_i32 s0, s32, 64 201; GFX940-NEXT: v_mov_b32_e32 v0, s0 202; GFX940-NEXT: s_and_b64 s[0:1], 0, exec 203; GFX940-NEXT: s_addc_u32 s0, s32, 0x4040 204; GFX940-NEXT: s_bitcmp1_b32 s0, 0 205; GFX940-NEXT: s_bitset0_b32 s0, 0 206; GFX940-NEXT: v_writelane_b32 v1, s59, 0 207; GFX940-NEXT: s_mov_b32 s59, s0 208; GFX940-NEXT: ;;#ASMSTART 209; GFX940-NEXT: ; use alloca0 v0 210; GFX940-NEXT: ;;#ASMEND 211; GFX940-NEXT: ;;#ASMSTART 212; GFX940-NEXT: ; use s59, scc 213; GFX940-NEXT: ;;#ASMEND 214; GFX940-NEXT: v_readlane_b32 s59, v1, 0 215; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 216; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 217; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload 218; GFX940-NEXT: s_mov_b64 exec, s[0:1] 219; GFX940-NEXT: s_waitcnt vmcnt(0) 220; GFX940-NEXT: s_setpc_b64 s[30:31] 221 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 222 %alloca1 = alloca i32, align 4, addrspace(5) 223 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) 224 call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0) 225 ret void 226} 227 228; %alloca1 should end up materializing with s_mov_b32, and scc is 229; available. 230define void @scalar_mov_materializes_frame_index_dead_scc() #0 { 231; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc: 232; GFX10_1: ; %bb.0: 233; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 235; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 236; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 237; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 238; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 239; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 240; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 241; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 242; GFX10_1-NEXT: s_addk_i32 s59, 0x4040 243; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 244; GFX10_1-NEXT: ;;#ASMSTART 245; GFX10_1-NEXT: ; use alloca0 v0 246; GFX10_1-NEXT: ;;#ASMEND 247; GFX10_1-NEXT: ;;#ASMSTART 248; GFX10_1-NEXT: ; use s59 249; GFX10_1-NEXT: ;;#ASMEND 250; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 251; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 252; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 253; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 254; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 255; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 256; GFX10_1-NEXT: s_waitcnt vmcnt(0) 257; GFX10_1-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc: 260; GFX10_3: ; %bb.0: 261; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 263; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 264; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 265; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 266; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 267; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 268; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 269; GFX10_3-NEXT: s_addk_i32 s59, 0x4040 270; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 271; GFX10_3-NEXT: ;;#ASMSTART 272; GFX10_3-NEXT: ; use alloca0 v0 273; GFX10_3-NEXT: ;;#ASMEND 274; GFX10_3-NEXT: ;;#ASMSTART 275; GFX10_3-NEXT: ; use s59 276; GFX10_3-NEXT: ;;#ASMEND 277; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 278; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 279; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 280; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 281; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 282; GFX10_3-NEXT: s_waitcnt vmcnt(0) 283; GFX10_3-NEXT: s_setpc_b64 s[30:31] 284; 285; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc: 286; GFX11: ; %bb.0: 287; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 289; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 290; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill 291; GFX11-NEXT: s_mov_b32 exec_lo, s0 292; GFX11-NEXT: v_writelane_b32 v1, s59, 0 293; GFX11-NEXT: s_add_i32 s0, s32, 64 294; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 295; GFX11-NEXT: v_mov_b32_e32 v0, s0 296; GFX11-NEXT: s_add_i32 s0, s32, 0x4040 297; GFX11-NEXT: ;;#ASMSTART 298; GFX11-NEXT: ; use alloca0 v0 299; GFX11-NEXT: ;;#ASMEND 300; GFX11-NEXT: s_mov_b32 s59, s0 301; GFX11-NEXT: ;;#ASMSTART 302; GFX11-NEXT: ; use s59 303; GFX11-NEXT: ;;#ASMEND 304; GFX11-NEXT: v_readlane_b32 s59, v1, 0 305; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 306; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 307; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload 308; GFX11-NEXT: s_mov_b32 exec_lo, s0 309; GFX11-NEXT: s_waitcnt vmcnt(0) 310; GFX11-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc: 313; GFX12: ; %bb.0: 314; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 315; GFX12-NEXT: s_wait_expcnt 0x0 316; GFX12-NEXT: s_wait_samplecnt 0x0 317; GFX12-NEXT: s_wait_bvhcnt 0x0 318; GFX12-NEXT: s_wait_kmcnt 0x0 319; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 320; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill 321; GFX12-NEXT: s_wait_alu 0xfffe 322; GFX12-NEXT: s_mov_b32 exec_lo, s0 323; GFX12-NEXT: v_writelane_b32 v1, s59, 0 324; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 325; GFX12-NEXT: v_mov_b32_e32 v0, s32 326; GFX12-NEXT: s_wait_alu 0xfffe 327; GFX12-NEXT: s_mov_b32 s59, s0 328; GFX12-NEXT: ;;#ASMSTART 329; GFX12-NEXT: ; use alloca0 v0 330; GFX12-NEXT: ;;#ASMEND 331; GFX12-NEXT: ;;#ASMSTART 332; GFX12-NEXT: ; use s59 333; GFX12-NEXT: ;;#ASMEND 334; GFX12-NEXT: v_readlane_b32 s59, v1, 0 335; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 336; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload 337; GFX12-NEXT: s_wait_alu 0xfffe 338; GFX12-NEXT: s_mov_b32 exec_lo, s0 339; GFX12-NEXT: s_wait_loadcnt 0x0 340; GFX12-NEXT: s_wait_alu 0xfffe 341; GFX12-NEXT: s_setpc_b64 s[30:31] 342; 343; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc: 344; GFX8: ; %bb.0: 345; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 347; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 348; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 349; GFX8-NEXT: s_mov_b64 exec, s[4:5] 350; GFX8-NEXT: v_writelane_b32 v1, s59, 0 351; GFX8-NEXT: s_lshr_b32 s59, s32, 6 352; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 353; GFX8-NEXT: s_addk_i32 s59, 0x4040 354; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 355; GFX8-NEXT: ;;#ASMSTART 356; GFX8-NEXT: ; use alloca0 v0 357; GFX8-NEXT: ;;#ASMEND 358; GFX8-NEXT: ;;#ASMSTART 359; GFX8-NEXT: ; use s59 360; GFX8-NEXT: ;;#ASMEND 361; GFX8-NEXT: v_readlane_b32 s59, v1, 0 362; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 363; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 364; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 365; GFX8-NEXT: s_mov_b64 exec, s[4:5] 366; GFX8-NEXT: s_waitcnt vmcnt(0) 367; GFX8-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc: 370; GFX900: ; %bb.0: 371; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 373; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 374; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 375; GFX900-NEXT: s_mov_b64 exec, s[4:5] 376; GFX900-NEXT: v_writelane_b32 v1, s59, 0 377; GFX900-NEXT: s_lshr_b32 s59, s32, 6 378; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 379; GFX900-NEXT: s_addk_i32 s59, 0x4040 380; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 381; GFX900-NEXT: ;;#ASMSTART 382; GFX900-NEXT: ; use alloca0 v0 383; GFX900-NEXT: ;;#ASMEND 384; GFX900-NEXT: ;;#ASMSTART 385; GFX900-NEXT: ; use s59 386; GFX900-NEXT: ;;#ASMEND 387; GFX900-NEXT: v_readlane_b32 s59, v1, 0 388; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 389; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 390; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 391; GFX900-NEXT: s_mov_b64 exec, s[4:5] 392; GFX900-NEXT: s_waitcnt vmcnt(0) 393; GFX900-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX940-LABEL: scalar_mov_materializes_frame_index_dead_scc: 396; GFX940: ; %bb.0: 397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 399; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 400; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill 401; GFX940-NEXT: s_mov_b64 exec, s[0:1] 402; GFX940-NEXT: s_add_i32 s0, s32, 64 403; GFX940-NEXT: v_mov_b32_e32 v0, s0 404; GFX940-NEXT: s_add_i32 s0, s32, 0x4040 405; GFX940-NEXT: v_writelane_b32 v1, s59, 0 406; GFX940-NEXT: s_mov_b32 s59, s0 407; GFX940-NEXT: ;;#ASMSTART 408; GFX940-NEXT: ; use alloca0 v0 409; GFX940-NEXT: ;;#ASMEND 410; GFX940-NEXT: ;;#ASMSTART 411; GFX940-NEXT: ; use s59 412; GFX940-NEXT: ;;#ASMEND 413; GFX940-NEXT: v_readlane_b32 s59, v1, 0 414; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 415; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 416; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload 417; GFX940-NEXT: s_mov_b64 exec, s[0:1] 418; GFX940-NEXT: s_waitcnt vmcnt(0) 419; GFX940-NEXT: s_setpc_b64 s[30:31] 420 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 421 %alloca1 = alloca i32, align 4, addrspace(5) 422 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) 423 call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca1) 424 ret void 425} 426 427define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { 428; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 429; GFX10_1: ; %bb.0: 430; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX10_1-NEXT: s_mov_b32 s5, s33 432; GFX10_1-NEXT: s_mov_b32 s33, s32 433; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 434; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 435; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 436; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 437; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 438; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 439; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 440; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 441; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo 442; GFX10_1-NEXT: s_mov_b32 s32, s33 443; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 444; GFX10_1-NEXT: ;;#ASMSTART 445; GFX10_1-NEXT: ; use alloca0 v0 446; GFX10_1-NEXT: ;;#ASMEND 447; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 448; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 449; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 450; GFX10_1-NEXT: ;;#ASMSTART 451; GFX10_1-NEXT: ; use s59, scc 452; GFX10_1-NEXT: ;;#ASMEND 453; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 454; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 455; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 456; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 457; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 458; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 459; GFX10_1-NEXT: s_mov_b32 s33, s5 460; GFX10_1-NEXT: s_waitcnt vmcnt(0) 461; GFX10_1-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 464; GFX10_3: ; %bb.0: 465; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX10_3-NEXT: s_mov_b32 s5, s33 467; GFX10_3-NEXT: s_mov_b32 s33, s32 468; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 469; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 470; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 471; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 472; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 473; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 474; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 475; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo 476; GFX10_3-NEXT: s_mov_b32 s32, s33 477; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 478; GFX10_3-NEXT: ;;#ASMSTART 479; GFX10_3-NEXT: ; use alloca0 v0 480; GFX10_3-NEXT: ;;#ASMEND 481; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 482; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 483; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 484; GFX10_3-NEXT: ;;#ASMSTART 485; GFX10_3-NEXT: ; use s59, scc 486; GFX10_3-NEXT: ;;#ASMEND 487; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 488; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 489; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 490; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 491; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 492; GFX10_3-NEXT: s_mov_b32 s33, s5 493; GFX10_3-NEXT: s_waitcnt vmcnt(0) 494; GFX10_3-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 497; GFX11: ; %bb.0: 498; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX11-NEXT: s_mov_b32 s1, s33 500; GFX11-NEXT: s_mov_b32 s33, s32 501; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 502; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 503; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill 504; GFX11-NEXT: s_mov_b32 exec_lo, s0 505; GFX11-NEXT: s_addk_i32 s32, 0x4080 506; GFX11-NEXT: s_add_i32 s0, s33, 64 507; GFX11-NEXT: v_writelane_b32 v1, s59, 0 508; GFX11-NEXT: v_mov_b32_e32 v0, s0 509; GFX11-NEXT: s_and_b32 s0, 0, exec_lo 510; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 511; GFX11-NEXT: ;;#ASMSTART 512; GFX11-NEXT: ; use alloca0 v0 513; GFX11-NEXT: ;;#ASMEND 514; GFX11-NEXT: s_bitcmp1_b32 s0, 0 515; GFX11-NEXT: s_bitset0_b32 s0, 0 516; GFX11-NEXT: s_mov_b32 s32, s33 517; GFX11-NEXT: s_mov_b32 s59, s0 518; GFX11-NEXT: ;;#ASMSTART 519; GFX11-NEXT: ; use s59, scc 520; GFX11-NEXT: ;;#ASMEND 521; GFX11-NEXT: v_readlane_b32 s59, v1, 0 522; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 523; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 524; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload 525; GFX11-NEXT: s_mov_b32 exec_lo, s0 526; GFX11-NEXT: s_mov_b32 s33, s1 527; GFX11-NEXT: s_waitcnt vmcnt(0) 528; GFX11-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 531; GFX12: ; %bb.0: 532; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 533; GFX12-NEXT: s_wait_expcnt 0x0 534; GFX12-NEXT: s_wait_samplecnt 0x0 535; GFX12-NEXT: s_wait_bvhcnt 0x0 536; GFX12-NEXT: s_wait_kmcnt 0x0 537; GFX12-NEXT: s_mov_b32 s1, s33 538; GFX12-NEXT: s_mov_b32 s33, s32 539; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 540; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill 541; GFX12-NEXT: s_wait_alu 0xfffe 542; GFX12-NEXT: s_mov_b32 exec_lo, s0 543; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 544; GFX12-NEXT: s_and_b32 s0, 0, exec_lo 545; GFX12-NEXT: v_writelane_b32 v1, s59, 0 546; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 547; GFX12-NEXT: v_mov_b32_e32 v0, s33 548; GFX12-NEXT: s_wait_alu 0xfffe 549; GFX12-NEXT: s_bitcmp1_b32 s0, 0 550; GFX12-NEXT: s_bitset0_b32 s0, 0 551; GFX12-NEXT: ;;#ASMSTART 552; GFX12-NEXT: ; use alloca0 v0 553; GFX12-NEXT: ;;#ASMEND 554; GFX12-NEXT: s_wait_alu 0xfffe 555; GFX12-NEXT: s_mov_b32 s59, s0 556; GFX12-NEXT: ;;#ASMSTART 557; GFX12-NEXT: ; use s59, scc 558; GFX12-NEXT: ;;#ASMEND 559; GFX12-NEXT: v_readlane_b32 s59, v1, 0 560; GFX12-NEXT: s_mov_b32 s32, s33 561; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 562; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload 563; GFX12-NEXT: s_wait_alu 0xfffe 564; GFX12-NEXT: s_mov_b32 exec_lo, s0 565; GFX12-NEXT: s_mov_b32 s33, s1 566; GFX12-NEXT: s_wait_loadcnt 0x0 567; GFX12-NEXT: s_wait_alu 0xfffe 568; GFX12-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 571; GFX8: ; %bb.0: 572; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX8-NEXT: s_mov_b32 s6, s33 574; GFX8-NEXT: s_mov_b32 s33, s32 575; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 576; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 577; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill 578; GFX8-NEXT: s_mov_b64 exec, s[4:5] 579; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 580; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 581; GFX8-NEXT: v_writelane_b32 v1, s59, 0 582; GFX8-NEXT: ;;#ASMSTART 583; GFX8-NEXT: ; use alloca0 v0 584; GFX8-NEXT: ;;#ASMEND 585; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 586; GFX8-NEXT: s_movk_i32 s59, 0x4040 587; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 588; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 589; GFX8-NEXT: v_readfirstlane_b32 s59, v0 590; GFX8-NEXT: s_and_b64 s[4:5], 0, exec 591; GFX8-NEXT: ;;#ASMSTART 592; GFX8-NEXT: ; use s59, scc 593; GFX8-NEXT: ;;#ASMEND 594; GFX8-NEXT: v_readlane_b32 s59, v1, 0 595; GFX8-NEXT: s_mov_b32 s32, s33 596; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 597; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 598; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload 599; GFX8-NEXT: s_mov_b64 exec, s[4:5] 600; GFX8-NEXT: s_mov_b32 s33, s6 601; GFX8-NEXT: s_waitcnt vmcnt(0) 602; GFX8-NEXT: s_setpc_b64 s[30:31] 603; 604; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 605; GFX900: ; %bb.0: 606; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607; GFX900-NEXT: s_mov_b32 s6, s33 608; GFX900-NEXT: s_mov_b32 s33, s32 609; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 610; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 611; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill 612; GFX900-NEXT: s_mov_b64 exec, s[4:5] 613; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 614; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 615; GFX900-NEXT: ;;#ASMSTART 616; GFX900-NEXT: ; use alloca0 v0 617; GFX900-NEXT: ;;#ASMEND 618; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 619; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 620; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 621; GFX900-NEXT: v_writelane_b32 v1, s59, 0 622; GFX900-NEXT: v_readfirstlane_b32 s59, v0 623; GFX900-NEXT: s_and_b64 s[4:5], 0, exec 624; GFX900-NEXT: ;;#ASMSTART 625; GFX900-NEXT: ; use s59, scc 626; GFX900-NEXT: ;;#ASMEND 627; GFX900-NEXT: v_readlane_b32 s59, v1, 0 628; GFX900-NEXT: s_mov_b32 s32, s33 629; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 630; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 631; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload 632; GFX900-NEXT: s_mov_b64 exec, s[4:5] 633; GFX900-NEXT: s_mov_b32 s33, s6 634; GFX900-NEXT: s_waitcnt vmcnt(0) 635; GFX900-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: 638; GFX940: ; %bb.0: 639; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX940-NEXT: s_mov_b32 s2, s33 641; GFX940-NEXT: s_mov_b32 s33, s32 642; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 643; GFX940-NEXT: s_add_i32 s3, s33, 0x4044 644; GFX940-NEXT: scratch_store_dword off, v1, s3 sc0 sc1 ; 4-byte Folded Spill 645; GFX940-NEXT: s_mov_b64 exec, s[0:1] 646; GFX940-NEXT: s_addk_i32 s32, 0x4080 647; GFX940-NEXT: s_add_i32 s0, s33, 64 648; GFX940-NEXT: v_mov_b32_e32 v0, s0 649; GFX940-NEXT: s_and_b64 s[0:1], 0, exec 650; GFX940-NEXT: s_addc_u32 s0, s33, 0x4040 651; GFX940-NEXT: s_bitcmp1_b32 s0, 0 652; GFX940-NEXT: s_bitset0_b32 s0, 0 653; GFX940-NEXT: v_writelane_b32 v1, s59, 0 654; GFX940-NEXT: s_mov_b32 s59, s0 655; GFX940-NEXT: ;;#ASMSTART 656; GFX940-NEXT: ; use alloca0 v0 657; GFX940-NEXT: ;;#ASMEND 658; GFX940-NEXT: ;;#ASMSTART 659; GFX940-NEXT: ; use s59, scc 660; GFX940-NEXT: ;;#ASMEND 661; GFX940-NEXT: v_readlane_b32 s59, v1, 0 662; GFX940-NEXT: s_mov_b32 s32, s33 663; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 664; GFX940-NEXT: s_add_i32 s3, s33, 0x4044 665; GFX940-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload 666; GFX940-NEXT: s_mov_b64 exec, s[0:1] 667; GFX940-NEXT: s_mov_b32 s33, s2 668; GFX940-NEXT: s_waitcnt vmcnt(0) 669; GFX940-NEXT: s_setpc_b64 s[30:31] 670 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 671 %alloca1 = alloca i32, align 4, addrspace(5) 672 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) 673 call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0) 674 ret void 675} 676 677define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() #0 { 678; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 679; GFX10_1: ; %bb.0: 680; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 681; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 682; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 683; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill 684; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 685; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 686; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 687; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 688; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo 689; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 690; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 691; GFX10_1-NEXT: ;;#ASMSTART 692; GFX10_1-NEXT: ; use s59, scc 693; GFX10_1-NEXT: ;;#ASMEND 694; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 695; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 696; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 697; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload 698; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 699; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 700; GFX10_1-NEXT: s_waitcnt vmcnt(0) 701; GFX10_1-NEXT: s_setpc_b64 s[30:31] 702; 703; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 704; GFX10_3: ; %bb.0: 705; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 706; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 707; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 708; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill 709; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 710; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 711; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 712; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo 713; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 714; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 715; GFX10_3-NEXT: ;;#ASMSTART 716; GFX10_3-NEXT: ; use s59, scc 717; GFX10_3-NEXT: ;;#ASMEND 718; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 719; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 720; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 721; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload 722; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 723; GFX10_3-NEXT: s_waitcnt vmcnt(0) 724; GFX10_3-NEXT: s_setpc_b64 s[30:31] 725; 726; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 727; GFX11: ; %bb.0: 728; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 729; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 730; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 731; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill 732; GFX11-NEXT: s_mov_b32 exec_lo, s0 733; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 734; GFX11-NEXT: s_and_b32 s0, 0, exec_lo 735; GFX11-NEXT: v_writelane_b32 v0, s59, 0 736; GFX11-NEXT: s_addc_u32 s0, s32, 64 737; GFX11-NEXT: s_bitcmp1_b32 s0, 0 738; GFX11-NEXT: s_bitset0_b32 s0, 0 739; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 740; GFX11-NEXT: s_mov_b32 s59, s0 741; GFX11-NEXT: ;;#ASMSTART 742; GFX11-NEXT: ; use s59, scc 743; GFX11-NEXT: ;;#ASMEND 744; GFX11-NEXT: v_readlane_b32 s59, v0, 0 745; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 746; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 747; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload 748; GFX11-NEXT: s_mov_b32 exec_lo, s0 749; GFX11-NEXT: s_waitcnt vmcnt(0) 750; GFX11-NEXT: s_setpc_b64 s[30:31] 751; 752; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 753; GFX12: ; %bb.0: 754; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 755; GFX12-NEXT: s_wait_expcnt 0x0 756; GFX12-NEXT: s_wait_samplecnt 0x0 757; GFX12-NEXT: s_wait_bvhcnt 0x0 758; GFX12-NEXT: s_wait_kmcnt 0x0 759; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 760; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill 761; GFX12-NEXT: s_wait_alu 0xfffe 762; GFX12-NEXT: s_mov_b32 exec_lo, s0 763; GFX12-NEXT: v_writelane_b32 v0, s59, 0 764; GFX12-NEXT: s_mov_b32 s59, s32 765; GFX12-NEXT: s_and_b32 s0, 0, exec_lo 766; GFX12-NEXT: ;;#ASMSTART 767; GFX12-NEXT: ; use s59, scc 768; GFX12-NEXT: ;;#ASMEND 769; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 770; GFX12-NEXT: v_readlane_b32 s59, v0, 0 771; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 772; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload 773; GFX12-NEXT: s_wait_alu 0xfffe 774; GFX12-NEXT: s_mov_b32 exec_lo, s0 775; GFX12-NEXT: s_wait_loadcnt 0x0 776; GFX12-NEXT: s_wait_alu 0xfffe 777; GFX12-NEXT: s_setpc_b64 s[30:31] 778; 779; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 780; GFX8: ; %bb.0: 781; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 782; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 783; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 784; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 785; GFX8-NEXT: s_mov_b64 exec, s[4:5] 786; GFX8-NEXT: v_writelane_b32 v0, s59, 0 787; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 788; GFX8-NEXT: s_mov_b32 s59, 64 789; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1 790; GFX8-NEXT: v_readfirstlane_b32 s59, v1 791; GFX8-NEXT: s_and_b64 s[4:5], 0, exec 792; GFX8-NEXT: ;;#ASMSTART 793; GFX8-NEXT: ; use s59, scc 794; GFX8-NEXT: ;;#ASMEND 795; GFX8-NEXT: v_readlane_b32 s59, v0, 0 796; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 797; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 798; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 799; GFX8-NEXT: s_mov_b64 exec, s[4:5] 800; GFX8-NEXT: s_waitcnt vmcnt(0) 801; GFX8-NEXT: s_setpc_b64 s[30:31] 802; 803; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 804; GFX900: ; %bb.0: 805; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 806; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 807; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 808; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 809; GFX900-NEXT: s_mov_b64 exec, s[4:5] 810; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 811; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 812; GFX900-NEXT: v_writelane_b32 v0, s59, 0 813; GFX900-NEXT: v_readfirstlane_b32 s59, v1 814; GFX900-NEXT: s_and_b64 s[4:5], 0, exec 815; GFX900-NEXT: ;;#ASMSTART 816; GFX900-NEXT: ; use s59, scc 817; GFX900-NEXT: ;;#ASMEND 818; GFX900-NEXT: v_readlane_b32 s59, v0, 0 819; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 820; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 821; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 822; GFX900-NEXT: s_mov_b64 exec, s[4:5] 823; GFX900-NEXT: s_waitcnt vmcnt(0) 824; GFX900-NEXT: s_setpc_b64 s[30:31] 825; 826; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: 827; GFX940: ; %bb.0: 828; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 829; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 830; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 831; GFX940-NEXT: scratch_store_dword off, v0, s2 sc0 sc1 ; 4-byte Folded Spill 832; GFX940-NEXT: s_mov_b64 exec, s[0:1] 833; GFX940-NEXT: s_and_b64 s[0:1], 0, exec 834; GFX940-NEXT: s_addc_u32 s0, s32, 64 835; GFX940-NEXT: s_bitcmp1_b32 s0, 0 836; GFX940-NEXT: s_bitset0_b32 s0, 0 837; GFX940-NEXT: v_writelane_b32 v0, s59, 0 838; GFX940-NEXT: s_mov_b32 s59, s0 839; GFX940-NEXT: ;;#ASMSTART 840; GFX940-NEXT: ; use s59, scc 841; GFX940-NEXT: ;;#ASMEND 842; GFX940-NEXT: v_readlane_b32 s59, v0, 0 843; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 844; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 845; GFX940-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload 846; GFX940-NEXT: s_mov_b64 exec, s[0:1] 847; GFX940-NEXT: s_waitcnt vmcnt(0) 848; GFX940-NEXT: s_setpc_b64 s[30:31] 849 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 850 call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) 851 ret void 852} 853 854define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 { 855; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 856; GFX10_1: ; %bb.0: 857; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 858; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 859; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 860; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill 861; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 862; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 863; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 864; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 865; GFX10_1-NEXT: s_add_i32 s59, s59, 64 866; GFX10_1-NEXT: ;;#ASMSTART 867; GFX10_1-NEXT: ; use s59 868; GFX10_1-NEXT: ;;#ASMEND 869; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 870; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 871; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 872; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload 873; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 874; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 875; GFX10_1-NEXT: s_waitcnt vmcnt(0) 876; GFX10_1-NEXT: s_setpc_b64 s[30:31] 877; 878; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 879; GFX10_3: ; %bb.0: 880; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 881; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 882; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 883; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill 884; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 885; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 886; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 887; GFX10_3-NEXT: s_add_i32 s59, s59, 64 888; GFX10_3-NEXT: ;;#ASMSTART 889; GFX10_3-NEXT: ; use s59 890; GFX10_3-NEXT: ;;#ASMEND 891; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 892; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 893; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 894; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload 895; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 896; GFX10_3-NEXT: s_waitcnt vmcnt(0) 897; GFX10_3-NEXT: s_setpc_b64 s[30:31] 898; 899; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 900; GFX11: ; %bb.0: 901; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 902; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 903; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 904; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill 905; GFX11-NEXT: s_mov_b32 exec_lo, s0 906; GFX11-NEXT: v_writelane_b32 v0, s59, 0 907; GFX11-NEXT: s_add_i32 s0, s32, 64 908; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 909; GFX11-NEXT: s_mov_b32 s59, s0 910; GFX11-NEXT: ;;#ASMSTART 911; GFX11-NEXT: ; use s59 912; GFX11-NEXT: ;;#ASMEND 913; GFX11-NEXT: v_readlane_b32 s59, v0, 0 914; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 915; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 916; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload 917; GFX11-NEXT: s_mov_b32 exec_lo, s0 918; GFX11-NEXT: s_waitcnt vmcnt(0) 919; GFX11-NEXT: s_setpc_b64 s[30:31] 920; 921; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 922; GFX12: ; %bb.0: 923; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 924; GFX12-NEXT: s_wait_expcnt 0x0 925; GFX12-NEXT: s_wait_samplecnt 0x0 926; GFX12-NEXT: s_wait_bvhcnt 0x0 927; GFX12-NEXT: s_wait_kmcnt 0x0 928; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 929; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill 930; GFX12-NEXT: s_wait_alu 0xfffe 931; GFX12-NEXT: s_mov_b32 exec_lo, s0 932; GFX12-NEXT: v_writelane_b32 v0, s59, 0 933; GFX12-NEXT: s_mov_b32 s59, s32 934; GFX12-NEXT: ;;#ASMSTART 935; GFX12-NEXT: ; use s59 936; GFX12-NEXT: ;;#ASMEND 937; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 938; GFX12-NEXT: v_readlane_b32 s59, v0, 0 939; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 940; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload 941; GFX12-NEXT: s_wait_alu 0xfffe 942; GFX12-NEXT: s_mov_b32 exec_lo, s0 943; GFX12-NEXT: s_wait_loadcnt 0x0 944; GFX12-NEXT: s_wait_alu 0xfffe 945; GFX12-NEXT: s_setpc_b64 s[30:31] 946; 947; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 948; GFX8: ; %bb.0: 949; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 951; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 952; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 953; GFX8-NEXT: s_mov_b64 exec, s[4:5] 954; GFX8-NEXT: v_writelane_b32 v0, s59, 0 955; GFX8-NEXT: s_lshr_b32 s59, s32, 6 956; GFX8-NEXT: s_add_i32 s59, s59, 64 957; GFX8-NEXT: ;;#ASMSTART 958; GFX8-NEXT: ; use s59 959; GFX8-NEXT: ;;#ASMEND 960; GFX8-NEXT: v_readlane_b32 s59, v0, 0 961; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 962; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 963; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 964; GFX8-NEXT: s_mov_b64 exec, s[4:5] 965; GFX8-NEXT: s_waitcnt vmcnt(0) 966; GFX8-NEXT: s_setpc_b64 s[30:31] 967; 968; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 969; GFX900: ; %bb.0: 970; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 971; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 972; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 973; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 974; GFX900-NEXT: s_mov_b64 exec, s[4:5] 975; GFX900-NEXT: v_writelane_b32 v0, s59, 0 976; GFX900-NEXT: s_lshr_b32 s59, s32, 6 977; GFX900-NEXT: s_add_i32 s59, s59, 64 978; GFX900-NEXT: ;;#ASMSTART 979; GFX900-NEXT: ; use s59 980; GFX900-NEXT: ;;#ASMEND 981; GFX900-NEXT: v_readlane_b32 s59, v0, 0 982; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 983; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 984; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 985; GFX900-NEXT: s_mov_b64 exec, s[4:5] 986; GFX900-NEXT: s_waitcnt vmcnt(0) 987; GFX900-NEXT: s_setpc_b64 s[30:31] 988; 989; GFX940-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: 990; GFX940: ; %bb.0: 991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 992; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 993; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 994; GFX940-NEXT: scratch_store_dword off, v0, s2 sc0 sc1 ; 4-byte Folded Spill 995; GFX940-NEXT: s_mov_b64 exec, s[0:1] 996; GFX940-NEXT: s_add_i32 s0, s32, 64 997; GFX940-NEXT: v_writelane_b32 v0, s59, 0 998; GFX940-NEXT: s_mov_b32 s59, s0 999; GFX940-NEXT: ;;#ASMSTART 1000; GFX940-NEXT: ; use s59 1001; GFX940-NEXT: ;;#ASMEND 1002; GFX940-NEXT: v_readlane_b32 s59, v0, 0 1003; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 1004; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 1005; GFX940-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload 1006; GFX940-NEXT: s_mov_b64 exec, s[0:1] 1007; GFX940-NEXT: s_waitcnt vmcnt(0) 1008; GFX940-NEXT: s_setpc_b64 s[30:31] 1009 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 1010 call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) 1011 ret void 1012} 1013 1014define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp() #1 { 1015; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1016; GFX10_1: ; %bb.0: 1017; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1018; GFX10_1-NEXT: s_mov_b32 s5, s33 1019; GFX10_1-NEXT: s_mov_b32 s33, s32 1020; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 1021; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 1022; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 1023; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1024; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 1025; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 1026; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 1027; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 1028; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo 1029; GFX10_1-NEXT: s_mov_b32 s32, s33 1030; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 1031; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 1032; GFX10_1-NEXT: ;;#ASMSTART 1033; GFX10_1-NEXT: ; use s59, scc 1034; GFX10_1-NEXT: ;;#ASMEND 1035; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 1036; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 1037; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 1038; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 1039; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1040; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 1041; GFX10_1-NEXT: s_mov_b32 s33, s5 1042; GFX10_1-NEXT: s_waitcnt vmcnt(0) 1043; GFX10_1-NEXT: s_setpc_b64 s[30:31] 1044; 1045; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1046; GFX10_3: ; %bb.0: 1047; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1048; GFX10_3-NEXT: s_mov_b32 s5, s33 1049; GFX10_3-NEXT: s_mov_b32 s33, s32 1050; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 1051; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 1052; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 1053; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 1054; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 1055; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 1056; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 1057; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo 1058; GFX10_3-NEXT: s_mov_b32 s32, s33 1059; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 1060; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 1061; GFX10_3-NEXT: ;;#ASMSTART 1062; GFX10_3-NEXT: ; use s59, scc 1063; GFX10_3-NEXT: ;;#ASMEND 1064; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 1065; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 1066; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 1067; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 1068; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 1069; GFX10_3-NEXT: s_mov_b32 s33, s5 1070; GFX10_3-NEXT: s_waitcnt vmcnt(0) 1071; GFX10_3-NEXT: s_setpc_b64 s[30:31] 1072; 1073; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1074; GFX11: ; %bb.0: 1075; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1076; GFX11-NEXT: s_mov_b32 s1, s33 1077; GFX11-NEXT: s_mov_b32 s33, s32 1078; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 1079; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 1080; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill 1081; GFX11-NEXT: s_mov_b32 exec_lo, s0 1082; GFX11-NEXT: s_addk_i32 s32, 0x4080 1083; GFX11-NEXT: s_and_b32 s0, 0, exec_lo 1084; GFX11-NEXT: v_writelane_b32 v0, s59, 0 1085; GFX11-NEXT: s_addc_u32 s0, s33, 64 1086; GFX11-NEXT: s_mov_b32 s32, s33 1087; GFX11-NEXT: s_bitcmp1_b32 s0, 0 1088; GFX11-NEXT: s_bitset0_b32 s0, 0 1089; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1090; GFX11-NEXT: s_mov_b32 s59, s0 1091; GFX11-NEXT: ;;#ASMSTART 1092; GFX11-NEXT: ; use s59, scc 1093; GFX11-NEXT: ;;#ASMEND 1094; GFX11-NEXT: v_readlane_b32 s59, v0, 0 1095; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 1096; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 1097; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload 1098; GFX11-NEXT: s_mov_b32 exec_lo, s0 1099; GFX11-NEXT: s_mov_b32 s33, s1 1100; GFX11-NEXT: s_waitcnt vmcnt(0) 1101; GFX11-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1104; GFX12: ; %bb.0: 1105; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1106; GFX12-NEXT: s_wait_expcnt 0x0 1107; GFX12-NEXT: s_wait_samplecnt 0x0 1108; GFX12-NEXT: s_wait_bvhcnt 0x0 1109; GFX12-NEXT: s_wait_kmcnt 0x0 1110; GFX12-NEXT: s_mov_b32 s1, s33 1111; GFX12-NEXT: s_mov_b32 s33, s32 1112; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 1113; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill 1114; GFX12-NEXT: s_wait_alu 0xfffe 1115; GFX12-NEXT: s_mov_b32 exec_lo, s0 1116; GFX12-NEXT: v_writelane_b32 v0, s59, 0 1117; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 1118; GFX12-NEXT: s_mov_b32 s59, s33 1119; GFX12-NEXT: s_and_b32 s0, 0, exec_lo 1120; GFX12-NEXT: ;;#ASMSTART 1121; GFX12-NEXT: ; use s59, scc 1122; GFX12-NEXT: ;;#ASMEND 1123; GFX12-NEXT: v_readlane_b32 s59, v0, 0 1124; GFX12-NEXT: s_mov_b32 s32, s33 1125; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 1126; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload 1127; GFX12-NEXT: s_wait_alu 0xfffe 1128; GFX12-NEXT: s_mov_b32 exec_lo, s0 1129; GFX12-NEXT: s_mov_b32 s33, s1 1130; GFX12-NEXT: s_wait_loadcnt 0x0 1131; GFX12-NEXT: s_wait_alu 0xfffe 1132; GFX12-NEXT: s_setpc_b64 s[30:31] 1133; 1134; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1135; GFX8: ; %bb.0: 1136; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1137; GFX8-NEXT: s_mov_b32 s6, s33 1138; GFX8-NEXT: s_mov_b32 s33, s32 1139; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1140; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 1141; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill 1142; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1143; GFX8-NEXT: v_writelane_b32 v0, s59, 0 1144; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 1145; GFX8-NEXT: s_mov_b32 s59, 64 1146; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1 1147; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 1148; GFX8-NEXT: v_readfirstlane_b32 s59, v1 1149; GFX8-NEXT: s_and_b64 s[4:5], 0, exec 1150; GFX8-NEXT: ;;#ASMSTART 1151; GFX8-NEXT: ; use s59, scc 1152; GFX8-NEXT: ;;#ASMEND 1153; GFX8-NEXT: v_readlane_b32 s59, v0, 0 1154; GFX8-NEXT: s_mov_b32 s32, s33 1155; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1156; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 1157; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload 1158; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1159; GFX8-NEXT: s_mov_b32 s33, s6 1160; GFX8-NEXT: s_waitcnt vmcnt(0) 1161; GFX8-NEXT: s_setpc_b64 s[30:31] 1162; 1163; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1164; GFX900: ; %bb.0: 1165; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX900-NEXT: s_mov_b32 s6, s33 1167; GFX900-NEXT: s_mov_b32 s33, s32 1168; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1169; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 1170; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill 1171; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1172; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 1173; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 1174; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 1175; GFX900-NEXT: v_writelane_b32 v0, s59, 0 1176; GFX900-NEXT: v_readfirstlane_b32 s59, v1 1177; GFX900-NEXT: s_and_b64 s[4:5], 0, exec 1178; GFX900-NEXT: ;;#ASMSTART 1179; GFX900-NEXT: ; use s59, scc 1180; GFX900-NEXT: ;;#ASMEND 1181; GFX900-NEXT: v_readlane_b32 s59, v0, 0 1182; GFX900-NEXT: s_mov_b32 s32, s33 1183; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1184; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 1185; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload 1186; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1187; GFX900-NEXT: s_mov_b32 s33, s6 1188; GFX900-NEXT: s_waitcnt vmcnt(0) 1189; GFX900-NEXT: s_setpc_b64 s[30:31] 1190; 1191; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: 1192; GFX940: ; %bb.0: 1193; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1194; GFX940-NEXT: s_mov_b32 s2, s33 1195; GFX940-NEXT: s_mov_b32 s33, s32 1196; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 1197; GFX940-NEXT: s_add_i32 s3, s33, 0x4040 1198; GFX940-NEXT: scratch_store_dword off, v0, s3 sc0 sc1 ; 4-byte Folded Spill 1199; GFX940-NEXT: s_mov_b64 exec, s[0:1] 1200; GFX940-NEXT: s_addk_i32 s32, 0x4080 1201; GFX940-NEXT: s_and_b64 s[0:1], 0, exec 1202; GFX940-NEXT: s_addc_u32 s0, s33, 64 1203; GFX940-NEXT: s_bitcmp1_b32 s0, 0 1204; GFX940-NEXT: s_bitset0_b32 s0, 0 1205; GFX940-NEXT: v_writelane_b32 v0, s59, 0 1206; GFX940-NEXT: s_mov_b32 s59, s0 1207; GFX940-NEXT: ;;#ASMSTART 1208; GFX940-NEXT: ; use s59, scc 1209; GFX940-NEXT: ;;#ASMEND 1210; GFX940-NEXT: v_readlane_b32 s59, v0, 0 1211; GFX940-NEXT: s_mov_b32 s32, s33 1212; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 1213; GFX940-NEXT: s_add_i32 s3, s33, 0x4040 1214; GFX940-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload 1215; GFX940-NEXT: s_mov_b64 exec, s[0:1] 1216; GFX940-NEXT: s_mov_b32 s33, s2 1217; GFX940-NEXT: s_waitcnt vmcnt(0) 1218; GFX940-NEXT: s_setpc_b64 s[30:31] 1219 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 1220 call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) 1221 ret void 1222} 1223 1224define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() #1 { 1225; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1226; GFX10_1: ; %bb.0: 1227; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1228; GFX10_1-NEXT: s_mov_b32 s4, s33 1229; GFX10_1-NEXT: s_mov_b32 s33, s32 1230; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 1231; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 1232; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 1233; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1234; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 1235; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 1236; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 1237; GFX10_1-NEXT: s_lshr_b32 s59, s33, 5 1238; GFX10_1-NEXT: s_mov_b32 s32, s33 1239; GFX10_1-NEXT: s_add_i32 s59, s59, 64 1240; GFX10_1-NEXT: ;;#ASMSTART 1241; GFX10_1-NEXT: ; use s59 1242; GFX10_1-NEXT: ;;#ASMEND 1243; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 1244; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 1245; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 1246; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 1247; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1248; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 1249; GFX10_1-NEXT: s_mov_b32 s33, s4 1250; GFX10_1-NEXT: s_waitcnt vmcnt(0) 1251; GFX10_1-NEXT: s_setpc_b64 s[30:31] 1252; 1253; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1254; GFX10_3: ; %bb.0: 1255; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1256; GFX10_3-NEXT: s_mov_b32 s4, s33 1257; GFX10_3-NEXT: s_mov_b32 s33, s32 1258; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 1259; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 1260; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 1261; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 1262; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 1263; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 1264; GFX10_3-NEXT: s_lshr_b32 s59, s33, 5 1265; GFX10_3-NEXT: s_mov_b32 s32, s33 1266; GFX10_3-NEXT: s_add_i32 s59, s59, 64 1267; GFX10_3-NEXT: ;;#ASMSTART 1268; GFX10_3-NEXT: ; use s59 1269; GFX10_3-NEXT: ;;#ASMEND 1270; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 1271; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 1272; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 1273; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 1274; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 1275; GFX10_3-NEXT: s_mov_b32 s33, s4 1276; GFX10_3-NEXT: s_waitcnt vmcnt(0) 1277; GFX10_3-NEXT: s_setpc_b64 s[30:31] 1278; 1279; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1280; GFX11: ; %bb.0: 1281; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1282; GFX11-NEXT: s_mov_b32 s0, s33 1283; GFX11-NEXT: s_mov_b32 s33, s32 1284; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 1285; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 1286; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill 1287; GFX11-NEXT: s_mov_b32 exec_lo, s1 1288; GFX11-NEXT: v_writelane_b32 v0, s59, 0 1289; GFX11-NEXT: s_addk_i32 s32, 0x4080 1290; GFX11-NEXT: s_add_i32 s1, s33, 64 1291; GFX11-NEXT: s_mov_b32 s32, s33 1292; GFX11-NEXT: s_mov_b32 s59, s1 1293; GFX11-NEXT: ;;#ASMSTART 1294; GFX11-NEXT: ; use s59 1295; GFX11-NEXT: ;;#ASMEND 1296; GFX11-NEXT: v_readlane_b32 s59, v0, 0 1297; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 1298; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 1299; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload 1300; GFX11-NEXT: s_mov_b32 exec_lo, s1 1301; GFX11-NEXT: s_mov_b32 s33, s0 1302; GFX11-NEXT: s_waitcnt vmcnt(0) 1303; GFX11-NEXT: s_setpc_b64 s[30:31] 1304; 1305; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1306; GFX12: ; %bb.0: 1307; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1308; GFX12-NEXT: s_wait_expcnt 0x0 1309; GFX12-NEXT: s_wait_samplecnt 0x0 1310; GFX12-NEXT: s_wait_bvhcnt 0x0 1311; GFX12-NEXT: s_wait_kmcnt 0x0 1312; GFX12-NEXT: s_mov_b32 s0, s33 1313; GFX12-NEXT: s_mov_b32 s33, s32 1314; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 1315; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill 1316; GFX12-NEXT: s_wait_alu 0xfffe 1317; GFX12-NEXT: s_mov_b32 exec_lo, s1 1318; GFX12-NEXT: v_writelane_b32 v0, s59, 0 1319; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 1320; GFX12-NEXT: s_mov_b32 s59, s33 1321; GFX12-NEXT: ;;#ASMSTART 1322; GFX12-NEXT: ; use s59 1323; GFX12-NEXT: ;;#ASMEND 1324; GFX12-NEXT: s_mov_b32 s32, s33 1325; GFX12-NEXT: v_readlane_b32 s59, v0, 0 1326; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 1327; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload 1328; GFX12-NEXT: s_wait_alu 0xfffe 1329; GFX12-NEXT: s_mov_b32 exec_lo, s1 1330; GFX12-NEXT: s_mov_b32 s33, s0 1331; GFX12-NEXT: s_wait_loadcnt 0x0 1332; GFX12-NEXT: s_wait_alu 0xfffe 1333; GFX12-NEXT: s_setpc_b64 s[30:31] 1334; 1335; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1336; GFX8: ; %bb.0: 1337; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1338; GFX8-NEXT: s_mov_b32 s4, s33 1339; GFX8-NEXT: s_mov_b32 s33, s32 1340; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 1341; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 1342; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill 1343; GFX8-NEXT: s_mov_b64 exec, s[6:7] 1344; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 1345; GFX8-NEXT: v_writelane_b32 v0, s59, 0 1346; GFX8-NEXT: s_lshr_b32 s59, s33, 6 1347; GFX8-NEXT: s_add_i32 s59, s59, 64 1348; GFX8-NEXT: ;;#ASMSTART 1349; GFX8-NEXT: ; use s59 1350; GFX8-NEXT: ;;#ASMEND 1351; GFX8-NEXT: v_readlane_b32 s59, v0, 0 1352; GFX8-NEXT: s_mov_b32 s32, s33 1353; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 1354; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 1355; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload 1356; GFX8-NEXT: s_mov_b64 exec, s[6:7] 1357; GFX8-NEXT: s_mov_b32 s33, s4 1358; GFX8-NEXT: s_waitcnt vmcnt(0) 1359; GFX8-NEXT: s_setpc_b64 s[30:31] 1360; 1361; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1362; GFX900: ; %bb.0: 1363; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1364; GFX900-NEXT: s_mov_b32 s4, s33 1365; GFX900-NEXT: s_mov_b32 s33, s32 1366; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 1367; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 1368; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill 1369; GFX900-NEXT: s_mov_b64 exec, s[6:7] 1370; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 1371; GFX900-NEXT: v_writelane_b32 v0, s59, 0 1372; GFX900-NEXT: s_lshr_b32 s59, s33, 6 1373; GFX900-NEXT: s_add_i32 s59, s59, 64 1374; GFX900-NEXT: ;;#ASMSTART 1375; GFX900-NEXT: ; use s59 1376; GFX900-NEXT: ;;#ASMEND 1377; GFX900-NEXT: v_readlane_b32 s59, v0, 0 1378; GFX900-NEXT: s_mov_b32 s32, s33 1379; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 1380; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 1381; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload 1382; GFX900-NEXT: s_mov_b64 exec, s[6:7] 1383; GFX900-NEXT: s_mov_b32 s33, s4 1384; GFX900-NEXT: s_waitcnt vmcnt(0) 1385; GFX900-NEXT: s_setpc_b64 s[30:31] 1386; 1387; GFX940-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: 1388; GFX940: ; %bb.0: 1389; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1390; GFX940-NEXT: s_mov_b32 s0, s33 1391; GFX940-NEXT: s_mov_b32 s33, s32 1392; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 1393; GFX940-NEXT: s_add_i32 s1, s33, 0x4040 1394; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; 4-byte Folded Spill 1395; GFX940-NEXT: s_mov_b64 exec, s[2:3] 1396; GFX940-NEXT: s_addk_i32 s32, 0x4080 1397; GFX940-NEXT: s_add_i32 s1, s33, 64 1398; GFX940-NEXT: v_writelane_b32 v0, s59, 0 1399; GFX940-NEXT: s_mov_b32 s59, s1 1400; GFX940-NEXT: ;;#ASMSTART 1401; GFX940-NEXT: ; use s59 1402; GFX940-NEXT: ;;#ASMEND 1403; GFX940-NEXT: v_readlane_b32 s59, v0, 0 1404; GFX940-NEXT: s_mov_b32 s32, s33 1405; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 1406; GFX940-NEXT: s_add_i32 s1, s33, 0x4040 1407; GFX940-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload 1408; GFX940-NEXT: s_mov_b64 exec, s[2:3] 1409; GFX940-NEXT: s_mov_b32 s33, s0 1410; GFX940-NEXT: s_waitcnt vmcnt(0) 1411; GFX940-NEXT: s_setpc_b64 s[30:31] 1412 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 1413 call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) 1414 ret void 1415} 1416 1417define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset() #0 { 1418; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1419; GFX10_1: ; %bb.0: 1420; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 1422; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 1423; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 1424; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1425; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 1426; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 1427; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 1428; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 1429; GFX10_1-NEXT: s_add_i32 s59, s4, 0x442c 1430; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo 1431; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 1432; GFX10_1-NEXT: ;;#ASMSTART 1433; GFX10_1-NEXT: ; use alloca0 v0 1434; GFX10_1-NEXT: ;;#ASMEND 1435; GFX10_1-NEXT: ;;#ASMSTART 1436; GFX10_1-NEXT: ; use s59, scc 1437; GFX10_1-NEXT: ;;#ASMEND 1438; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 1439; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 1440; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 1441; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 1442; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1443; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 1444; GFX10_1-NEXT: s_waitcnt vmcnt(0) 1445; GFX10_1-NEXT: s_setpc_b64 s[30:31] 1446; 1447; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1448; GFX10_3: ; %bb.0: 1449; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1450; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 1451; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 1452; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 1453; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 1454; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 1455; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 1456; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 1457; GFX10_3-NEXT: s_add_i32 s59, s4, 0x442c 1458; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo 1459; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 1460; GFX10_3-NEXT: ;;#ASMSTART 1461; GFX10_3-NEXT: ; use alloca0 v0 1462; GFX10_3-NEXT: ;;#ASMEND 1463; GFX10_3-NEXT: ;;#ASMSTART 1464; GFX10_3-NEXT: ; use s59, scc 1465; GFX10_3-NEXT: ;;#ASMEND 1466; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 1467; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 1468; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 1469; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 1470; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 1471; GFX10_3-NEXT: s_waitcnt vmcnt(0) 1472; GFX10_3-NEXT: s_setpc_b64 s[30:31] 1473; 1474; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1475; GFX11: ; %bb.0: 1476; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1477; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 1478; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 1479; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill 1480; GFX11-NEXT: s_mov_b32 exec_lo, s0 1481; GFX11-NEXT: v_writelane_b32 v1, s59, 0 1482; GFX11-NEXT: s_add_i32 s0, s32, 64 1483; GFX11-NEXT: s_add_i32 s59, s32, 0x442c 1484; GFX11-NEXT: v_mov_b32_e32 v0, s0 1485; GFX11-NEXT: s_and_b32 s0, 0, exec_lo 1486; GFX11-NEXT: ;;#ASMSTART 1487; GFX11-NEXT: ; use alloca0 v0 1488; GFX11-NEXT: ;;#ASMEND 1489; GFX11-NEXT: ;;#ASMSTART 1490; GFX11-NEXT: ; use s59, scc 1491; GFX11-NEXT: ;;#ASMEND 1492; GFX11-NEXT: v_readlane_b32 s59, v1, 0 1493; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 1494; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 1495; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload 1496; GFX11-NEXT: s_mov_b32 exec_lo, s0 1497; GFX11-NEXT: s_waitcnt vmcnt(0) 1498; GFX11-NEXT: s_setpc_b64 s[30:31] 1499; 1500; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1501; GFX12: ; %bb.0: 1502; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1503; GFX12-NEXT: s_wait_expcnt 0x0 1504; GFX12-NEXT: s_wait_samplecnt 0x0 1505; GFX12-NEXT: s_wait_bvhcnt 0x0 1506; GFX12-NEXT: s_wait_kmcnt 0x0 1507; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 1508; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill 1509; GFX12-NEXT: s_wait_alu 0xfffe 1510; GFX12-NEXT: s_mov_b32 exec_lo, s0 1511; GFX12-NEXT: v_writelane_b32 v1, s59, 0 1512; GFX12-NEXT: s_add_co_i32 s59, s32, 0x43ec 1513; GFX12-NEXT: v_mov_b32_e32 v0, s32 1514; GFX12-NEXT: s_and_b32 s0, 0, exec_lo 1515; GFX12-NEXT: ;;#ASMSTART 1516; GFX12-NEXT: ; use alloca0 v0 1517; GFX12-NEXT: ;;#ASMEND 1518; GFX12-NEXT: ;;#ASMSTART 1519; GFX12-NEXT: ; use s59, scc 1520; GFX12-NEXT: ;;#ASMEND 1521; GFX12-NEXT: v_readlane_b32 s59, v1, 0 1522; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 1523; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload 1524; GFX12-NEXT: s_wait_alu 0xfffe 1525; GFX12-NEXT: s_mov_b32 exec_lo, s0 1526; GFX12-NEXT: s_wait_loadcnt 0x0 1527; GFX12-NEXT: s_wait_alu 0xfffe 1528; GFX12-NEXT: s_setpc_b64 s[30:31] 1529; 1530; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1531; GFX8: ; %bb.0: 1532; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1533; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1534; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 1535; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 1536; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1537; GFX8-NEXT: s_lshr_b32 s4, s32, 6 1538; GFX8-NEXT: v_writelane_b32 v1, s59, 0 1539; GFX8-NEXT: s_add_i32 s59, s4, 0x442c 1540; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 1541; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 1542; GFX8-NEXT: ;;#ASMSTART 1543; GFX8-NEXT: ; use alloca0 v0 1544; GFX8-NEXT: ;;#ASMEND 1545; GFX8-NEXT: s_and_b64 s[4:5], 0, exec 1546; GFX8-NEXT: ;;#ASMSTART 1547; GFX8-NEXT: ; use s59, scc 1548; GFX8-NEXT: ;;#ASMEND 1549; GFX8-NEXT: v_readlane_b32 s59, v1, 0 1550; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1551; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 1552; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 1553; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1554; GFX8-NEXT: s_waitcnt vmcnt(0) 1555; GFX8-NEXT: s_setpc_b64 s[30:31] 1556; 1557; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1558; GFX900: ; %bb.0: 1559; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1560; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1561; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 1562; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 1563; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1564; GFX900-NEXT: s_lshr_b32 s4, s32, 6 1565; GFX900-NEXT: v_writelane_b32 v1, s59, 0 1566; GFX900-NEXT: s_add_i32 s59, s4, 0x442c 1567; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 1568; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 1569; GFX900-NEXT: ;;#ASMSTART 1570; GFX900-NEXT: ; use alloca0 v0 1571; GFX900-NEXT: ;;#ASMEND 1572; GFX900-NEXT: s_and_b64 s[4:5], 0, exec 1573; GFX900-NEXT: ;;#ASMSTART 1574; GFX900-NEXT: ; use s59, scc 1575; GFX900-NEXT: ;;#ASMEND 1576; GFX900-NEXT: v_readlane_b32 s59, v1, 0 1577; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1578; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 1579; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 1580; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1581; GFX900-NEXT: s_waitcnt vmcnt(0) 1582; GFX900-NEXT: s_setpc_b64 s[30:31] 1583; 1584; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: 1585; GFX940: ; %bb.0: 1586; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1587; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 1588; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 1589; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill 1590; GFX940-NEXT: s_mov_b64 exec, s[0:1] 1591; GFX940-NEXT: v_writelane_b32 v1, s59, 0 1592; GFX940-NEXT: s_add_i32 s59, s32, 0x442c 1593; GFX940-NEXT: s_add_i32 s0, s32, 64 1594; GFX940-NEXT: v_mov_b32_e32 v0, s0 1595; GFX940-NEXT: ;;#ASMSTART 1596; GFX940-NEXT: ; use alloca0 v0 1597; GFX940-NEXT: ;;#ASMEND 1598; GFX940-NEXT: s_and_b64 s[0:1], 0, exec 1599; GFX940-NEXT: ;;#ASMSTART 1600; GFX940-NEXT: ; use s59, scc 1601; GFX940-NEXT: ;;#ASMEND 1602; GFX940-NEXT: v_readlane_b32 s59, v1, 0 1603; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 1604; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 1605; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload 1606; GFX940-NEXT: s_mov_b64 exec, s[0:1] 1607; GFX940-NEXT: s_waitcnt vmcnt(0) 1608; GFX940-NEXT: s_setpc_b64 s[30:31] 1609 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 1610 %alloca1 = alloca [4096 x i32], align 4, addrspace(5) 1611 %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251 1612 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) 1613 call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) 1614 ret void 1615} 1616 1617define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset(i32 inreg %soffset) #0 { 1618; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1619; GFX10_1: ; %bb.0: 1620; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1621; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 1622; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 1623; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 1624; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1625; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 1626; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 1627; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 1628; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2 1629; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 1630; GFX10_1-NEXT: s_add_i32 s59, s59, s4 1631; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 1632; GFX10_1-NEXT: s_addk_i32 s59, 0x4040 1633; GFX10_1-NEXT: ;;#ASMSTART 1634; GFX10_1-NEXT: ; use alloca0 v0 1635; GFX10_1-NEXT: ;;#ASMEND 1636; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo 1637; GFX10_1-NEXT: ;;#ASMSTART 1638; GFX10_1-NEXT: ; use s59, scc 1639; GFX10_1-NEXT: ;;#ASMEND 1640; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 1641; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 1642; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 1643; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 1644; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 1645; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 1646; GFX10_1-NEXT: s_waitcnt vmcnt(0) 1647; GFX10_1-NEXT: s_setpc_b64 s[30:31] 1648; 1649; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1650; GFX10_3: ; %bb.0: 1651; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 1653; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 1654; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill 1655; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 1656; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 1657; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 1658; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2 1659; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 1660; GFX10_3-NEXT: s_add_i32 s59, s59, s4 1661; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 1662; GFX10_3-NEXT: s_addk_i32 s59, 0x4040 1663; GFX10_3-NEXT: ;;#ASMSTART 1664; GFX10_3-NEXT: ; use alloca0 v0 1665; GFX10_3-NEXT: ;;#ASMEND 1666; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo 1667; GFX10_3-NEXT: ;;#ASMSTART 1668; GFX10_3-NEXT: ; use s59, scc 1669; GFX10_3-NEXT: ;;#ASMEND 1670; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 1671; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 1672; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 1673; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload 1674; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 1675; GFX10_3-NEXT: s_waitcnt vmcnt(0) 1676; GFX10_3-NEXT: s_setpc_b64 s[30:31] 1677; 1678; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1679; GFX11: ; %bb.0: 1680; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1681; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 1682; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 1683; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill 1684; GFX11-NEXT: s_mov_b32 exec_lo, s1 1685; GFX11-NEXT: s_add_i32 s1, s32, 64 1686; GFX11-NEXT: v_writelane_b32 v1, s59, 0 1687; GFX11-NEXT: s_lshl_b32 s0, s0, 2 1688; GFX11-NEXT: v_mov_b32_e32 v0, s1 1689; GFX11-NEXT: s_add_i32 s59, s32, s0 1690; GFX11-NEXT: ;;#ASMSTART 1691; GFX11-NEXT: ; use alloca0 v0 1692; GFX11-NEXT: ;;#ASMEND 1693; GFX11-NEXT: s_addk_i32 s59, 0x4040 1694; GFX11-NEXT: s_and_b32 s0, 0, exec_lo 1695; GFX11-NEXT: ;;#ASMSTART 1696; GFX11-NEXT: ; use s59, scc 1697; GFX11-NEXT: ;;#ASMEND 1698; GFX11-NEXT: v_readlane_b32 s59, v1, 0 1699; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 1700; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 1701; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload 1702; GFX11-NEXT: s_mov_b32 exec_lo, s0 1703; GFX11-NEXT: s_waitcnt vmcnt(0) 1704; GFX11-NEXT: s_setpc_b64 s[30:31] 1705; 1706; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1707; GFX12: ; %bb.0: 1708; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1709; GFX12-NEXT: s_wait_expcnt 0x0 1710; GFX12-NEXT: s_wait_samplecnt 0x0 1711; GFX12-NEXT: s_wait_bvhcnt 0x0 1712; GFX12-NEXT: s_wait_kmcnt 0x0 1713; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 1714; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill 1715; GFX12-NEXT: s_wait_alu 0xfffe 1716; GFX12-NEXT: s_mov_b32 exec_lo, s1 1717; GFX12-NEXT: v_writelane_b32 v1, s59, 0 1718; GFX12-NEXT: s_lshl_b32 s0, s0, 2 1719; GFX12-NEXT: v_mov_b32_e32 v0, s32 1720; GFX12-NEXT: s_wait_alu 0xfffe 1721; GFX12-NEXT: s_add_co_i32 s59, s32, s0 1722; GFX12-NEXT: ;;#ASMSTART 1723; GFX12-NEXT: ; use alloca0 v0 1724; GFX12-NEXT: ;;#ASMEND 1725; GFX12-NEXT: s_wait_alu 0xfffe 1726; GFX12-NEXT: s_addk_co_i32 s59, 0x4000 1727; GFX12-NEXT: s_and_b32 s0, 0, exec_lo 1728; GFX12-NEXT: ;;#ASMSTART 1729; GFX12-NEXT: ; use s59, scc 1730; GFX12-NEXT: ;;#ASMEND 1731; GFX12-NEXT: v_readlane_b32 s59, v1, 0 1732; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 1733; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload 1734; GFX12-NEXT: s_wait_alu 0xfffe 1735; GFX12-NEXT: s_mov_b32 exec_lo, s0 1736; GFX12-NEXT: s_wait_loadcnt 0x0 1737; GFX12-NEXT: s_wait_alu 0xfffe 1738; GFX12-NEXT: s_setpc_b64 s[30:31] 1739; 1740; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1741; GFX8: ; %bb.0: 1742; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1743; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1744; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 1745; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 1746; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1747; GFX8-NEXT: v_writelane_b32 v1, s59, 0 1748; GFX8-NEXT: s_lshl_b32 s4, s16, 2 1749; GFX8-NEXT: s_lshr_b32 s59, s32, 6 1750; GFX8-NEXT: s_add_i32 s59, s59, s4 1751; GFX8-NEXT: s_addk_i32 s59, 0x4040 1752; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 1753; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 1754; GFX8-NEXT: ;;#ASMSTART 1755; GFX8-NEXT: ; use alloca0 v0 1756; GFX8-NEXT: ;;#ASMEND 1757; GFX8-NEXT: s_and_b64 s[4:5], 0, exec 1758; GFX8-NEXT: ;;#ASMSTART 1759; GFX8-NEXT: ; use s59, scc 1760; GFX8-NEXT: ;;#ASMEND 1761; GFX8-NEXT: v_readlane_b32 s59, v1, 0 1762; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1763; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 1764; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 1765; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1766; GFX8-NEXT: s_waitcnt vmcnt(0) 1767; GFX8-NEXT: s_setpc_b64 s[30:31] 1768; 1769; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1770; GFX900: ; %bb.0: 1771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1772; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1773; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 1774; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill 1775; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1776; GFX900-NEXT: v_writelane_b32 v1, s59, 0 1777; GFX900-NEXT: s_lshl_b32 s4, s16, 2 1778; GFX900-NEXT: s_lshr_b32 s59, s32, 6 1779; GFX900-NEXT: s_add_i32 s59, s59, s4 1780; GFX900-NEXT: s_addk_i32 s59, 0x4040 1781; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 1782; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 1783; GFX900-NEXT: ;;#ASMSTART 1784; GFX900-NEXT: ; use alloca0 v0 1785; GFX900-NEXT: ;;#ASMEND 1786; GFX900-NEXT: s_and_b64 s[4:5], 0, exec 1787; GFX900-NEXT: ;;#ASMSTART 1788; GFX900-NEXT: ; use s59, scc 1789; GFX900-NEXT: ;;#ASMEND 1790; GFX900-NEXT: v_readlane_b32 s59, v1, 0 1791; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1792; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 1793; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload 1794; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1795; GFX900-NEXT: s_waitcnt vmcnt(0) 1796; GFX900-NEXT: s_setpc_b64 s[30:31] 1797; 1798; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: 1799; GFX940: ; %bb.0: 1800; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1801; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 1802; GFX940-NEXT: s_add_i32 s1, s32, 0x8040 1803; GFX940-NEXT: scratch_store_dword off, v1, s1 sc0 sc1 ; 4-byte Folded Spill 1804; GFX940-NEXT: s_mov_b64 exec, s[2:3] 1805; GFX940-NEXT: s_lshl_b32 s0, s0, 2 1806; GFX940-NEXT: v_writelane_b32 v1, s59, 0 1807; GFX940-NEXT: s_add_i32 s59, s32, s0 1808; GFX940-NEXT: s_addk_i32 s59, 0x4040 1809; GFX940-NEXT: s_add_i32 s0, s32, 64 1810; GFX940-NEXT: v_mov_b32_e32 v0, s0 1811; GFX940-NEXT: ;;#ASMSTART 1812; GFX940-NEXT: ; use alloca0 v0 1813; GFX940-NEXT: ;;#ASMEND 1814; GFX940-NEXT: s_and_b64 s[0:1], 0, exec 1815; GFX940-NEXT: ;;#ASMSTART 1816; GFX940-NEXT: ; use s59, scc 1817; GFX940-NEXT: ;;#ASMEND 1818; GFX940-NEXT: v_readlane_b32 s59, v1, 0 1819; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 1820; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 1821; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload 1822; GFX940-NEXT: s_mov_b64 exec, s[0:1] 1823; GFX940-NEXT: s_waitcnt vmcnt(0) 1824; GFX940-NEXT: s_setpc_b64 s[30:31] 1825 %alloca0 = alloca [4096 x i32], align 64, addrspace(5) 1826 %alloca1 = alloca [4096 x i32], align 4, addrspace(5) 1827 %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset 1828 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) 1829 call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) 1830 ret void 1831} 1832 1833attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } 1834attributes #1 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" "frame-pointer"="all" } 1835;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1836; GFX9: {{.*}} 1837