1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 7 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX9 %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX10 %s 10; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX940 %s 11; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX11 %s 12; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX12 %s 13 14define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { 15; GFX9-LABEL: store_load_sindex_kernel: 16; GFX9: ; %bb.0: ; %bb 17; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 18; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 19; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 20; GFX9-NEXT: v_mov_b32_e32 v0, 15 21; GFX9-NEXT: s_waitcnt lgkmcnt(0) 22; GFX9-NEXT: s_lshl_b32 s1, s0, 2 23; GFX9-NEXT: s_and_b32 s0, s0, 15 24; GFX9-NEXT: s_lshl_b32 s0, s0, 2 25; GFX9-NEXT: scratch_store_dword off, v0, s1 26; GFX9-NEXT: s_waitcnt vmcnt(0) 27; GFX9-NEXT: scratch_load_dword v0, off, s0 glc 28; GFX9-NEXT: s_waitcnt vmcnt(0) 29; GFX9-NEXT: s_endpgm 30; 31; GFX10-LABEL: store_load_sindex_kernel: 32; GFX10: ; %bb.0: ; %bb 33; GFX10-NEXT: s_add_u32 s8, s8, s13 34; GFX10-NEXT: s_addc_u32 s9, s9, 0 35; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 36; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 37; GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 38; GFX10-NEXT: v_mov_b32_e32 v0, 15 39; GFX10-NEXT: s_waitcnt lgkmcnt(0) 40; GFX10-NEXT: s_and_b32 s1, s0, 15 41; GFX10-NEXT: s_lshl_b32 s0, s0, 2 42; GFX10-NEXT: s_lshl_b32 s1, s1, 2 43; GFX10-NEXT: scratch_store_dword off, v0, s0 44; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 45; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc 46; GFX10-NEXT: s_waitcnt vmcnt(0) 47; GFX10-NEXT: s_endpgm 48; 49; GFX940-LABEL: store_load_sindex_kernel: 50; GFX940: ; %bb.0: ; %bb 51; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 52; GFX940-NEXT: v_mov_b32_e32 v0, 15 53; GFX940-NEXT: s_waitcnt lgkmcnt(0) 54; GFX940-NEXT: s_lshl_b32 s1, s0, 2 55; GFX940-NEXT: s_and_b32 s0, s0, 15 56; GFX940-NEXT: s_lshl_b32 s0, s0, 2 57; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 58; GFX940-NEXT: s_waitcnt vmcnt(0) 59; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 60; GFX940-NEXT: s_waitcnt vmcnt(0) 61; GFX940-NEXT: s_endpgm 62; 63; GFX11-LABEL: store_load_sindex_kernel: 64; GFX11: ; %bb.0: ; %bb 65; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 66; GFX11-NEXT: v_mov_b32_e32 v0, 15 67; GFX11-NEXT: s_waitcnt lgkmcnt(0) 68; GFX11-NEXT: s_and_b32 s1, s0, 15 69; GFX11-NEXT: s_lshl_b32 s0, s0, 2 70; GFX11-NEXT: s_lshl_b32 s1, s1, 2 71; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc 72; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 73; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc 74; GFX11-NEXT: s_waitcnt vmcnt(0) 75; GFX11-NEXT: s_endpgm 76; 77; GFX12-LABEL: store_load_sindex_kernel: 78; GFX12: ; %bb.0: ; %bb 79; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 80; GFX12-NEXT: v_mov_b32_e32 v0, 15 81; GFX12-NEXT: s_wait_kmcnt 0x0 82; GFX12-NEXT: s_and_b32 s1, s0, 15 83; GFX12-NEXT: s_lshl_b32 s0, s0, 2 84; GFX12-NEXT: s_lshl_b32 s1, s1, 2 85; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 86; GFX12-NEXT: s_wait_storecnt 0x0 87; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS 88; GFX12-NEXT: s_wait_loadcnt 0x0 89; GFX12-NEXT: s_endpgm 90; 91; UNALIGNED_GFX9-LABEL: store_load_sindex_kernel: 92; UNALIGNED_GFX9: ; %bb.0: ; %bb 93; UNALIGNED_GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 94; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 95; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 96; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 97; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0) 98; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2 99; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15 100; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2 101; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1 102; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 103; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc 104; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 105; UNALIGNED_GFX9-NEXT: s_endpgm 106; 107; UNALIGNED_GFX10-LABEL: store_load_sindex_kernel: 108; UNALIGNED_GFX10: ; %bb.0: ; %bb 109; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 110; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 111; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 112; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 113; UNALIGNED_GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 114; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 15 115; UNALIGNED_GFX10-NEXT: s_waitcnt lgkmcnt(0) 116; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15 117; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2 118; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2 119; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0 120; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 121; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc 122; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 123; UNALIGNED_GFX10-NEXT: s_endpgm 124; 125; UNALIGNED_GFX940-LABEL: store_load_sindex_kernel: 126; UNALIGNED_GFX940: ; %bb.0: ; %bb 127; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 128; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 129; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) 130; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2 131; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15 132; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2 133; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 134; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 135; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 136; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 137; UNALIGNED_GFX940-NEXT: s_endpgm 138; 139; UNALIGNED_GFX11-LABEL: store_load_sindex_kernel: 140; UNALIGNED_GFX11: ; %bb.0: ; %bb 141; UNALIGNED_GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 142; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v0, 15 143; UNALIGNED_GFX11-NEXT: s_waitcnt lgkmcnt(0) 144; UNALIGNED_GFX11-NEXT: s_and_b32 s1, s0, 15 145; UNALIGNED_GFX11-NEXT: s_lshl_b32 s0, s0, 2 146; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2 147; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc 148; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 149; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc 150; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 151; UNALIGNED_GFX11-NEXT: s_endpgm 152; 153; UNALIGNED_GFX12-LABEL: store_load_sindex_kernel: 154; UNALIGNED_GFX12: ; %bb.0: ; %bb 155; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 156; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v0, 15 157; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 158; UNALIGNED_GFX12-NEXT: s_and_b32 s1, s0, 15 159; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 2 160; UNALIGNED_GFX12-NEXT: s_lshl_b32 s1, s1, 2 161; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 162; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 163; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS 164; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 165; UNALIGNED_GFX12-NEXT: s_endpgm 166bb: 167 %i = alloca [32 x float], align 4, addrspace(5) 168 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx 169 store volatile i32 15, ptr addrspace(5) %i7, align 4 170 %i9 = and i32 %idx, 15 171 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 172 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 173 ret void 174} 175 176define amdgpu_kernel void @store_load_vindex_kernel() { 177; GFX9-LABEL: store_load_vindex_kernel: 178; GFX9: ; %bb.0: ; %bb 179; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 180; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 181; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 182; GFX9-NEXT: v_mov_b32_e32 v2, 15 183; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 184; GFX9-NEXT: scratch_store_dword v1, v2, off 185; GFX9-NEXT: s_waitcnt vmcnt(0) 186; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 187; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc 188; GFX9-NEXT: s_waitcnt vmcnt(0) 189; GFX9-NEXT: s_endpgm 190; 191; GFX10-LABEL: store_load_vindex_kernel: 192; GFX10: ; %bb.0: ; %bb 193; GFX10-NEXT: s_add_u32 s8, s8, s13 194; GFX10-NEXT: s_addc_u32 s9, s9, 0 195; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 196; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 197; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 198; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 199; GFX10-NEXT: v_mov_b32_e32 v2, 15 200; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 201; GFX10-NEXT: scratch_store_dword v0, v2, off 202; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 203; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc 204; GFX10-NEXT: s_waitcnt vmcnt(0) 205; GFX10-NEXT: s_endpgm 206; 207; GFX940-LABEL: store_load_vindex_kernel: 208; GFX940: ; %bb.0: ; %bb 209; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 210; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 211; GFX940-NEXT: v_mov_b32_e32 v2, 15 212; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 213; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 214; GFX940-NEXT: s_waitcnt vmcnt(0) 215; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 216; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 217; GFX940-NEXT: s_waitcnt vmcnt(0) 218; GFX940-NEXT: s_endpgm 219; 220; GFX11-LABEL: store_load_vindex_kernel: 221; GFX11: ; %bb.0: ; %bb 222; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 223; GFX11-NEXT: v_mov_b32_e32 v2, 15 224; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 225; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 226; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 227; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 228; GFX11-NEXT: scratch_store_b32 v0, v2, off dlc 229; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 230; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:124 glc dlc 231; GFX11-NEXT: s_waitcnt vmcnt(0) 232; GFX11-NEXT: s_endpgm 233; 234; GFX12-LABEL: store_load_vindex_kernel: 235; GFX12: ; %bb.0: ; %bb 236; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 237; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 238; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0 239; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 240; GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1 241; GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS 242; GFX12-NEXT: s_wait_storecnt 0x0 243; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS 244; GFX12-NEXT: s_wait_loadcnt 0x0 245; GFX12-NEXT: s_endpgm 246; 247; UNALIGNED_GFX9-LABEL: store_load_vindex_kernel: 248; UNALIGNED_GFX9: ; %bb.0: ; %bb 249; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 250; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 251; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 252; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 15 253; UNALIGNED_GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 254; UNALIGNED_GFX9-NEXT: scratch_store_dword v1, v2, off 255; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 256; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 257; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc 258; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 259; UNALIGNED_GFX9-NEXT: s_endpgm 260; 261; UNALIGNED_GFX10-LABEL: store_load_vindex_kernel: 262; UNALIGNED_GFX10: ; %bb.0: ; %bb 263; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 264; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 265; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 266; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 267; UNALIGNED_GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 268; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 269; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 15 270; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 271; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v2, off 272; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 273; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc 274; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 275; UNALIGNED_GFX10-NEXT: s_endpgm 276; 277; UNALIGNED_GFX940-LABEL: store_load_vindex_kernel: 278; UNALIGNED_GFX940: ; %bb.0: ; %bb 279; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 280; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 281; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 282; UNALIGNED_GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 283; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 284; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 285; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 286; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 287; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 288; UNALIGNED_GFX940-NEXT: s_endpgm 289; 290; UNALIGNED_GFX11-LABEL: store_load_vindex_kernel: 291; UNALIGNED_GFX11: ; %bb.0: ; %bb 292; UNALIGNED_GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 293; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v2, 15 294; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 295; UNALIGNED_GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 296; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 297; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 298; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v2, off dlc 299; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 300; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off offset:124 glc dlc 301; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 302; UNALIGNED_GFX11-NEXT: s_endpgm 303; 304; UNALIGNED_GFX12-LABEL: store_load_vindex_kernel: 305; UNALIGNED_GFX12: ; %bb.0: ; %bb 306; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 307; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 308; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0 309; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 310; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1 311; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS 312; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 313; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS 314; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 315; UNALIGNED_GFX12-NEXT: s_endpgm 316bb: 317 %i = alloca [32 x float], align 4, addrspace(5) 318 %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() 319 %i3 = zext i32 %i2 to i64 320 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2 321 store volatile i32 15, ptr addrspace(5) %i7, align 4 322 %i9 = sub nsw i32 31, %i2 323 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 324 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 325 ret void 326} 327 328define void @store_load_vindex_foo(i32 %idx) { 329; GFX9-LABEL: store_load_vindex_foo: 330; GFX9: ; %bb.0: ; %bb 331; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 332; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 333; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 334; GFX9-NEXT: v_add_u32_e32 v1, s32, v1 335; GFX9-NEXT: v_mov_b32_e32 v2, 15 336; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 337; GFX9-NEXT: scratch_store_dword v1, v2, off 338; GFX9-NEXT: s_waitcnt vmcnt(0) 339; GFX9-NEXT: v_add_u32_e32 v0, s32, v0 340; GFX9-NEXT: scratch_load_dword v0, v0, off glc 341; GFX9-NEXT: s_waitcnt vmcnt(0) 342; GFX9-NEXT: s_setpc_b64 s[30:31] 343; 344; GFX10-LABEL: store_load_vindex_foo: 345; GFX10: ; %bb.0: ; %bb 346; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 348; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 349; GFX10-NEXT: v_mov_b32_e32 v2, 15 350; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 351; GFX10-NEXT: v_add_nc_u32_e32 v0, s32, v0 352; GFX10-NEXT: v_add_nc_u32_e32 v1, s32, v1 353; GFX10-NEXT: scratch_store_dword v0, v2, off 354; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 355; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc 356; GFX10-NEXT: s_waitcnt vmcnt(0) 357; GFX10-NEXT: s_setpc_b64 s[30:31] 358; 359; GFX940-LABEL: store_load_vindex_foo: 360; GFX940: ; %bb.0: ; %bb 361; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 363; GFX940-NEXT: v_add_u32_e32 v1, s32, v1 364; GFX940-NEXT: v_mov_b32_e32 v2, 15 365; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 366; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 367; GFX940-NEXT: s_waitcnt vmcnt(0) 368; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 369; GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 370; GFX940-NEXT: s_waitcnt vmcnt(0) 371; GFX940-NEXT: s_setpc_b64 s[30:31] 372; 373; GFX11-LABEL: store_load_vindex_foo: 374; GFX11: ; %bb.0: ; %bb 375; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 376; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 377; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 378; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 379; GFX11-NEXT: v_add_nc_u32_e32 v1, s32, v1 380; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 381; GFX11-NEXT: scratch_store_b32 v1, v2, off dlc 382; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 383; GFX11-NEXT: scratch_load_b32 v0, v0, s32 glc dlc 384; GFX11-NEXT: s_waitcnt vmcnt(0) 385; GFX11-NEXT: s_setpc_b64 s[30:31] 386; 387; GFX12-LABEL: store_load_vindex_foo: 388; GFX12: ; %bb.0: ; %bb 389; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 390; GFX12-NEXT: s_wait_expcnt 0x0 391; GFX12-NEXT: s_wait_samplecnt 0x0 392; GFX12-NEXT: s_wait_bvhcnt 0x0 393; GFX12-NEXT: s_wait_kmcnt 0x0 394; GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 395; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 396; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 397; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1 398; GFX12-NEXT: s_wait_storecnt 0x0 399; GFX12-NEXT: scratch_store_b32 v0, v2, s32 scope:SCOPE_SYS 400; GFX12-NEXT: s_wait_storecnt 0x0 401; GFX12-NEXT: scratch_load_b32 v0, v1, s32 scope:SCOPE_SYS 402; GFX12-NEXT: s_wait_loadcnt 0x0 403; GFX12-NEXT: s_setpc_b64 s[30:31] 404; 405; UNALIGNED_GFX9-LABEL: store_load_vindex_foo: 406; UNALIGNED_GFX9: ; %bb.0: ; %bb 407; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 409; UNALIGNED_GFX9-NEXT: v_and_b32_e32 v0, 15, v0 410; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, s32, v1 411; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 15 412; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 413; UNALIGNED_GFX9-NEXT: scratch_store_dword v1, v2, off 414; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 415; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s32, v0 416; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off glc 417; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 418; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 419; 420; UNALIGNED_GFX10-LABEL: store_load_vindex_foo: 421; UNALIGNED_GFX10: ; %bb.0: ; %bb 422; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 423; UNALIGNED_GFX10-NEXT: v_and_b32_e32 v1, 15, v0 424; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 425; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 15 426; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 427; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s32, v0 428; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, s32, v1 429; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v2, off 430; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 431; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc 432; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 433; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 434; 435; UNALIGNED_GFX940-LABEL: store_load_vindex_foo: 436; UNALIGNED_GFX940: ; %bb.0: ; %bb 437; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 439; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, s32, v1 440; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 441; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 15, v0 442; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 443; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 444; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 445; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 446; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 447; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 448; 449; UNALIGNED_GFX11-LABEL: store_load_vindex_foo: 450; UNALIGNED_GFX11: ; %bb.0: ; %bb 451; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 453; UNALIGNED_GFX11-NEXT: v_and_b32_e32 v0, 15, v0 454; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 455; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, s32, v1 456; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 457; UNALIGNED_GFX11-NEXT: scratch_store_b32 v1, v2, off dlc 458; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 459; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v0, s32 glc dlc 460; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 461; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 462; 463; UNALIGNED_GFX12-LABEL: store_load_vindex_foo: 464; UNALIGNED_GFX12: ; %bb.0: ; %bb 465; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 466; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 467; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 468; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 469; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 470; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 471; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 472; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 473; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1 474; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 475; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, s32 scope:SCOPE_SYS 476; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 477; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s32 scope:SCOPE_SYS 478; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 479; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 480bb: 481 %i = alloca [32 x float], align 4, addrspace(5) 482 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx 483 store volatile i32 15, ptr addrspace(5) %i7, align 4 484 %i9 = and i32 %idx, 15 485 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 486 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 487 ret void 488} 489 490define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) { 491; GFX9-LABEL: private_ptr_foo: 492; GFX9: ; %bb.0: 493; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX9-NEXT: v_mov_b32_e32 v1, 0x41200000 495; GFX9-NEXT: scratch_store_dword v0, v1, off offset:4 496; GFX9-NEXT: s_waitcnt vmcnt(0) 497; GFX9-NEXT: s_setpc_b64 s[30:31] 498; 499; GFX10-LABEL: private_ptr_foo: 500; GFX10: ; %bb.0: 501; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000 503; GFX10-NEXT: scratch_store_dword v0, v1, off offset:4 504; GFX10-NEXT: s_setpc_b64 s[30:31] 505; 506; GFX940-LABEL: private_ptr_foo: 507; GFX940: ; %bb.0: 508; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 509; GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000 510; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 511; GFX940-NEXT: s_waitcnt vmcnt(0) 512; GFX940-NEXT: s_setpc_b64 s[30:31] 513; 514; GFX11-LABEL: private_ptr_foo: 515; GFX11: ; %bb.0: 516; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX11-NEXT: v_mov_b32_e32 v1, 0x41200000 518; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:4 519; GFX11-NEXT: s_setpc_b64 s[30:31] 520; 521; GFX12-LABEL: private_ptr_foo: 522; GFX12: ; %bb.0: 523; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 524; GFX12-NEXT: s_wait_expcnt 0x0 525; GFX12-NEXT: s_wait_samplecnt 0x0 526; GFX12-NEXT: s_wait_bvhcnt 0x0 527; GFX12-NEXT: s_wait_kmcnt 0x0 528; GFX12-NEXT: v_mov_b32_e32 v1, 0x41200000 529; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:4 530; GFX12-NEXT: s_setpc_b64 s[30:31] 531; 532; UNALIGNED_GFX9-LABEL: private_ptr_foo: 533; UNALIGNED_GFX9: ; %bb.0: 534; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 0x41200000 536; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off offset:4 537; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 538; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 539; 540; UNALIGNED_GFX10-LABEL: private_ptr_foo: 541; UNALIGNED_GFX10: ; %bb.0: 542; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000 544; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:4 545; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 546; 547; UNALIGNED_GFX940-LABEL: private_ptr_foo: 548; UNALIGNED_GFX940: ; %bb.0: 549; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000 551; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 552; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 553; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 554; 555; UNALIGNED_GFX11-LABEL: private_ptr_foo: 556; UNALIGNED_GFX11: ; %bb.0: 557; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 558; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v1, 0x41200000 559; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:4 560; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 561; 562; UNALIGNED_GFX12-LABEL: private_ptr_foo: 563; UNALIGNED_GFX12: ; %bb.0: 564; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 565; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 566; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 567; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 568; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 569; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 0x41200000 570; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:4 571; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 572 %gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1 573 store float 1.000000e+01, ptr addrspace(5) %gep, align 4 574 ret void 575} 576 577define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { 578; GFX9-LABEL: store_load_sindex_small_offset_kernel: 579; GFX9: ; %bb.0: ; %bb 580; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 581; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 582; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 583; GFX9-NEXT: s_mov_b32 s1, 0 584; GFX9-NEXT: scratch_load_dword v0, off, s1 glc 585; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 586; GFX9-NEXT: s_lshl_b32 s1, s0, 2 587; GFX9-NEXT: s_and_b32 s0, s0, 15 588; GFX9-NEXT: v_mov_b32_e32 v0, 15 589; GFX9-NEXT: s_addk_i32 s1, 0x100 590; GFX9-NEXT: s_lshl_b32 s0, s0, 2 591; GFX9-NEXT: scratch_store_dword off, v0, s1 592; GFX9-NEXT: s_waitcnt vmcnt(0) 593; GFX9-NEXT: s_addk_i32 s0, 0x100 594; GFX9-NEXT: scratch_load_dword v0, off, s0 glc 595; GFX9-NEXT: s_waitcnt vmcnt(0) 596; GFX9-NEXT: s_endpgm 597; 598; GFX10-LABEL: store_load_sindex_small_offset_kernel: 599; GFX10: ; %bb.0: ; %bb 600; GFX10-NEXT: s_add_u32 s8, s8, s13 601; GFX10-NEXT: s_addc_u32 s9, s9, 0 602; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 603; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 604; GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 605; GFX10-NEXT: scratch_load_dword v0, off, off glc dlc 606; GFX10-NEXT: s_waitcnt vmcnt(0) 607; GFX10-NEXT: v_mov_b32_e32 v0, 15 608; GFX10-NEXT: s_waitcnt lgkmcnt(0) 609; GFX10-NEXT: s_and_b32 s1, s0, 15 610; GFX10-NEXT: s_lshl_b32 s0, s0, 2 611; GFX10-NEXT: s_lshl_b32 s1, s1, 2 612; GFX10-NEXT: s_addk_i32 s0, 0x100 613; GFX10-NEXT: s_addk_i32 s1, 0x100 614; GFX10-NEXT: scratch_store_dword off, v0, s0 615; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 616; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc 617; GFX10-NEXT: s_waitcnt vmcnt(0) 618; GFX10-NEXT: s_endpgm 619; 620; GFX940-LABEL: store_load_sindex_small_offset_kernel: 621; GFX940: ; %bb.0: ; %bb 622; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 623; GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 624; GFX940-NEXT: s_waitcnt vmcnt(0) 625; GFX940-NEXT: v_mov_b32_e32 v0, 15 626; GFX940-NEXT: s_waitcnt lgkmcnt(0) 627; GFX940-NEXT: s_lshl_b32 s1, s0, 2 628; GFX940-NEXT: s_and_b32 s0, s0, 15 629; GFX940-NEXT: s_addk_i32 s1, 0x100 630; GFX940-NEXT: s_lshl_b32 s0, s0, 2 631; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 632; GFX940-NEXT: s_waitcnt vmcnt(0) 633; GFX940-NEXT: s_addk_i32 s0, 0x100 634; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 635; GFX940-NEXT: s_waitcnt vmcnt(0) 636; GFX940-NEXT: s_endpgm 637; 638; GFX11-LABEL: store_load_sindex_small_offset_kernel: 639; GFX11: ; %bb.0: ; %bb 640; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 641; GFX11-NEXT: scratch_load_b32 v0, off, off glc dlc 642; GFX11-NEXT: s_waitcnt vmcnt(0) 643; GFX11-NEXT: v_mov_b32_e32 v0, 15 644; GFX11-NEXT: s_waitcnt lgkmcnt(0) 645; GFX11-NEXT: s_and_b32 s1, s0, 15 646; GFX11-NEXT: s_lshl_b32 s0, s0, 2 647; GFX11-NEXT: s_lshl_b32 s1, s1, 2 648; GFX11-NEXT: s_addk_i32 s0, 0x100 649; GFX11-NEXT: s_addk_i32 s1, 0x100 650; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc 651; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 652; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc 653; GFX11-NEXT: s_waitcnt vmcnt(0) 654; GFX11-NEXT: s_endpgm 655; 656; GFX12-LABEL: store_load_sindex_small_offset_kernel: 657; GFX12: ; %bb.0: ; %bb 658; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 659; GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS 660; GFX12-NEXT: s_wait_loadcnt 0x0 661; GFX12-NEXT: v_mov_b32_e32 v0, 15 662; GFX12-NEXT: s_wait_kmcnt 0x0 663; GFX12-NEXT: s_and_b32 s1, s0, 15 664; GFX12-NEXT: s_lshl_b32 s0, s0, 2 665; GFX12-NEXT: s_lshl_b32 s1, s1, 2 666; GFX12-NEXT: s_addk_co_i32 s0, 0x100 667; GFX12-NEXT: s_addk_co_i32 s1, 0x100 668; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 669; GFX12-NEXT: s_wait_storecnt 0x0 670; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS 671; GFX12-NEXT: s_wait_loadcnt 0x0 672; GFX12-NEXT: s_endpgm 673; 674; UNALIGNED_GFX9-LABEL: store_load_sindex_small_offset_kernel: 675; UNALIGNED_GFX9: ; %bb.0: ; %bb 676; UNALIGNED_GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 677; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 678; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 679; UNALIGNED_GFX9-NEXT: s_mov_b32 s1, 0 680; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s1 glc 681; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 682; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2 683; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15 684; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 685; UNALIGNED_GFX9-NEXT: s_addk_i32 s1, 0x100 686; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2 687; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1 688; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 689; UNALIGNED_GFX9-NEXT: s_addk_i32 s0, 0x100 690; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc 691; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 692; UNALIGNED_GFX9-NEXT: s_endpgm 693; 694; UNALIGNED_GFX10-LABEL: store_load_sindex_small_offset_kernel: 695; UNALIGNED_GFX10: ; %bb.0: ; %bb 696; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 697; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 698; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 699; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 700; UNALIGNED_GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 701; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, off glc dlc 702; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 703; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 15 704; UNALIGNED_GFX10-NEXT: s_waitcnt lgkmcnt(0) 705; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15 706; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2 707; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2 708; UNALIGNED_GFX10-NEXT: s_addk_i32 s0, 0x100 709; UNALIGNED_GFX10-NEXT: s_addk_i32 s1, 0x100 710; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0 711; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 712; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc 713; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 714; UNALIGNED_GFX10-NEXT: s_endpgm 715; 716; UNALIGNED_GFX940-LABEL: store_load_sindex_small_offset_kernel: 717; UNALIGNED_GFX940: ; %bb.0: ; %bb 718; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 719; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 720; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 721; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 722; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) 723; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2 724; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15 725; UNALIGNED_GFX940-NEXT: s_addk_i32 s1, 0x100 726; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2 727; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 728; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 729; UNALIGNED_GFX940-NEXT: s_addk_i32 s0, 0x100 730; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 731; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 732; UNALIGNED_GFX940-NEXT: s_endpgm 733; 734; UNALIGNED_GFX11-LABEL: store_load_sindex_small_offset_kernel: 735; UNALIGNED_GFX11: ; %bb.0: ; %bb 736; UNALIGNED_GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 737; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, off glc dlc 738; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 739; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v0, 15 740; UNALIGNED_GFX11-NEXT: s_waitcnt lgkmcnt(0) 741; UNALIGNED_GFX11-NEXT: s_and_b32 s1, s0, 15 742; UNALIGNED_GFX11-NEXT: s_lshl_b32 s0, s0, 2 743; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2 744; UNALIGNED_GFX11-NEXT: s_addk_i32 s0, 0x100 745; UNALIGNED_GFX11-NEXT: s_addk_i32 s1, 0x100 746; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc 747; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 748; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc 749; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 750; UNALIGNED_GFX11-NEXT: s_endpgm 751; 752; UNALIGNED_GFX12-LABEL: store_load_sindex_small_offset_kernel: 753; UNALIGNED_GFX12: ; %bb.0: ; %bb 754; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 755; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS 756; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 757; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v0, 15 758; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 759; UNALIGNED_GFX12-NEXT: s_and_b32 s1, s0, 15 760; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 2 761; UNALIGNED_GFX12-NEXT: s_lshl_b32 s1, s1, 2 762; UNALIGNED_GFX12-NEXT: s_addk_co_i32 s0, 0x100 763; UNALIGNED_GFX12-NEXT: s_addk_co_i32 s1, 0x100 764; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 765; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 766; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS 767; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 768; UNALIGNED_GFX12-NEXT: s_endpgm 769bb: 770 %padding = alloca [64 x i32], align 4, addrspace(5) 771 %i = alloca [32 x float], align 4, addrspace(5) 772 %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef 773 %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 774 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx 775 store volatile i32 15, ptr addrspace(5) %i7, align 4 776 %i9 = and i32 %idx, 15 777 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 778 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 779 ret void 780} 781 782define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { 783; GFX9-LABEL: store_load_vindex_small_offset_kernel: 784; GFX9: ; %bb.0: ; %bb 785; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 786; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 787; GFX9-NEXT: s_mov_b32 s0, 0 788; GFX9-NEXT: scratch_load_dword v1, off, s0 glc 789; GFX9-NEXT: s_waitcnt vmcnt(0) 790; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 791; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 792; GFX9-NEXT: v_add_u32_e32 v1, 0x100, v1 793; GFX9-NEXT: v_mov_b32_e32 v2, 15 794; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 795; GFX9-NEXT: scratch_store_dword v1, v2, off 796; GFX9-NEXT: s_waitcnt vmcnt(0) 797; GFX9-NEXT: v_add_u32_e32 v0, 0x100, v0 798; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc 799; GFX9-NEXT: s_waitcnt vmcnt(0) 800; GFX9-NEXT: s_endpgm 801; 802; GFX10-LABEL: store_load_vindex_small_offset_kernel: 803; GFX10: ; %bb.0: ; %bb 804; GFX10-NEXT: s_add_u32 s8, s8, s13 805; GFX10-NEXT: s_addc_u32 s9, s9, 0 806; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 807; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 808; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 809; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 810; GFX10-NEXT: v_mov_b32_e32 v2, 15 811; GFX10-NEXT: scratch_load_dword v3, off, off glc dlc 812; GFX10-NEXT: s_waitcnt vmcnt(0) 813; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 814; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 815; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 816; GFX10-NEXT: scratch_store_dword v0, v2, off 817; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 818; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc 819; GFX10-NEXT: s_waitcnt vmcnt(0) 820; GFX10-NEXT: s_endpgm 821; 822; GFX940-LABEL: store_load_vindex_small_offset_kernel: 823; GFX940: ; %bb.0: ; %bb 824; GFX940-NEXT: scratch_load_dword v1, off, off sc0 sc1 825; GFX940-NEXT: s_waitcnt vmcnt(0) 826; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 827; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 828; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 829; GFX940-NEXT: v_mov_b32_e32 v2, 15 830; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 831; GFX940-NEXT: scratch_store_dword v1, v2, off offset:256 sc0 sc1 832; GFX940-NEXT: s_waitcnt vmcnt(0) 833; GFX940-NEXT: v_add_u32_e32 v0, 0x100, v0 834; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 835; GFX940-NEXT: s_waitcnt vmcnt(0) 836; GFX940-NEXT: s_endpgm 837; 838; GFX11-LABEL: store_load_vindex_small_offset_kernel: 839; GFX11: ; %bb.0: ; %bb 840; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 841; GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc 842; GFX11-NEXT: s_waitcnt vmcnt(0) 843; GFX11-NEXT: v_mov_b32_e32 v2, 15 844; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 845; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 846; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 847; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 848; GFX11-NEXT: scratch_store_b32 v0, v2, off offset:256 dlc 849; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 850; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 851; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:124 glc dlc 852; GFX11-NEXT: s_waitcnt vmcnt(0) 853; GFX11-NEXT: s_endpgm 854; 855; GFX12-LABEL: store_load_vindex_small_offset_kernel: 856; GFX12: ; %bb.0: ; %bb 857; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 858; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS 859; GFX12-NEXT: s_wait_loadcnt 0x0 860; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0 861; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 862; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 863; GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1 864; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:256 scope:SCOPE_SYS 865; GFX12-NEXT: s_wait_storecnt 0x0 866; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:380 scope:SCOPE_SYS 867; GFX12-NEXT: s_wait_loadcnt 0x0 868; GFX12-NEXT: s_endpgm 869; 870; UNALIGNED_GFX9-LABEL: store_load_vindex_small_offset_kernel: 871; UNALIGNED_GFX9: ; %bb.0: ; %bb 872; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 873; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 874; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0 875; UNALIGNED_GFX9-NEXT: scratch_load_dword v1, off, s0 glc 876; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 877; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 878; UNALIGNED_GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 879; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 0x100, v1 880; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 15 881; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 882; UNALIGNED_GFX9-NEXT: scratch_store_dword v1, v2, off 883; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 884; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, 0x100, v0 885; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc 886; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 887; UNALIGNED_GFX9-NEXT: s_endpgm 888; 889; UNALIGNED_GFX10-LABEL: store_load_vindex_small_offset_kernel: 890; UNALIGNED_GFX10: ; %bb.0: ; %bb 891; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 892; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 893; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 894; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 895; UNALIGNED_GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 896; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 897; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 15 898; UNALIGNED_GFX10-NEXT: scratch_load_dword v3, off, off glc dlc 899; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 900; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 901; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 902; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 903; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v2, off 904; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 905; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc 906; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 907; UNALIGNED_GFX10-NEXT: s_endpgm 908; 909; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_kernel: 910; UNALIGNED_GFX940: ; %bb.0: ; %bb 911; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, off sc0 sc1 912; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 913; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 914; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 915; UNALIGNED_GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 916; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 917; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 918; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off offset:256 sc0 sc1 919; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 920; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, 0x100, v0 921; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 922; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 923; UNALIGNED_GFX940-NEXT: s_endpgm 924; 925; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_kernel: 926; UNALIGNED_GFX11: ; %bb.0: ; %bb 927; UNALIGNED_GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 928; UNALIGNED_GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc 929; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 930; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v2, 15 931; UNALIGNED_GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 932; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 933; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 934; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 935; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v2, off offset:256 dlc 936; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 937; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 938; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off offset:124 glc dlc 939; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 940; UNALIGNED_GFX11-NEXT: s_endpgm 941; 942; UNALIGNED_GFX12-LABEL: store_load_vindex_small_offset_kernel: 943; UNALIGNED_GFX12: ; %bb.0: ; %bb 944; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 945; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS 946; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 947; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0 948; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 949; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 950; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1 951; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:256 scope:SCOPE_SYS 952; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 953; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:380 scope:SCOPE_SYS 954; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 955; UNALIGNED_GFX12-NEXT: s_endpgm 956bb: 957 %padding = alloca [64 x i32], align 4, addrspace(5) 958 %i = alloca [32 x float], align 4, addrspace(5) 959 %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef 960 %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 961 %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() 962 %i3 = zext i32 %i2 to i64 963 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2 964 store volatile i32 15, ptr addrspace(5) %i7, align 4 965 %i9 = sub nsw i32 31, %i2 966 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 967 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 968 ret void 969} 970 971define void @store_load_vindex_small_offset_foo(i32 %idx) { 972; GFX9-LABEL: store_load_vindex_small_offset_foo: 973; GFX9: ; %bb.0: ; %bb 974; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 975; GFX9-NEXT: scratch_load_dword v1, off, s32 glc 976; GFX9-NEXT: s_waitcnt vmcnt(0) 977; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 978; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 979; GFX9-NEXT: v_add_u32_e32 v1, s32, v1 980; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 981; GFX9-NEXT: v_add_u32_e32 v1, 0x100, v1 982; GFX9-NEXT: v_mov_b32_e32 v2, 15 983; GFX9-NEXT: v_add_u32_e32 v0, s32, v0 984; GFX9-NEXT: scratch_store_dword v1, v2, off 985; GFX9-NEXT: s_waitcnt vmcnt(0) 986; GFX9-NEXT: v_add_u32_e32 v0, 0x100, v0 987; GFX9-NEXT: scratch_load_dword v0, v0, off glc 988; GFX9-NEXT: s_waitcnt vmcnt(0) 989; GFX9-NEXT: s_setpc_b64 s[30:31] 990; 991; GFX10-LABEL: store_load_vindex_small_offset_foo: 992; GFX10: ; %bb.0: ; %bb 993; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 994; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 995; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 996; GFX10-NEXT: v_mov_b32_e32 v2, 15 997; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc 998; GFX10-NEXT: s_waitcnt vmcnt(0) 999; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1000; GFX10-NEXT: v_add_nc_u32_e32 v0, s32, v0 1001; GFX10-NEXT: v_add_nc_u32_e32 v1, s32, v1 1002; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 1003; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 1004; GFX10-NEXT: scratch_store_dword v0, v2, off 1005; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1006; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc 1007; GFX10-NEXT: s_waitcnt vmcnt(0) 1008; GFX10-NEXT: s_setpc_b64 s[30:31] 1009; 1010; GFX940-LABEL: store_load_vindex_small_offset_foo: 1011; GFX940: ; %bb.0: ; %bb 1012; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1013; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 1014; GFX940-NEXT: s_waitcnt vmcnt(0) 1015; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1016; GFX940-NEXT: v_add_u32_e32 v1, s32, v1 1017; GFX940-NEXT: v_add_u32_e32 v1, 0x100, v1 1018; GFX940-NEXT: v_mov_b32_e32 v2, 15 1019; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 1020; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 1021; GFX940-NEXT: s_waitcnt vmcnt(0) 1022; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1023; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 1024; GFX940-NEXT: s_waitcnt vmcnt(0) 1025; GFX940-NEXT: s_setpc_b64 s[30:31] 1026; 1027; GFX11-LABEL: store_load_vindex_small_offset_foo: 1028; GFX11: ; %bb.0: ; %bb 1029; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 1031; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 1032; GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc 1033; GFX11-NEXT: s_waitcnt vmcnt(0) 1034; GFX11-NEXT: v_add_nc_u32_e32 v1, s32, v1 1035; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1036; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1037; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 1038; GFX11-NEXT: scratch_store_b32 v1, v2, off dlc 1039; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1040; GFX11-NEXT: scratch_load_b32 v0, v0, s32 offset:256 glc dlc 1041; GFX11-NEXT: s_waitcnt vmcnt(0) 1042; GFX11-NEXT: s_setpc_b64 s[30:31] 1043; 1044; GFX12-LABEL: store_load_vindex_small_offset_foo: 1045; GFX12: ; %bb.0: ; %bb 1046; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1047; GFX12-NEXT: s_wait_expcnt 0x0 1048; GFX12-NEXT: s_wait_samplecnt 0x0 1049; GFX12-NEXT: s_wait_bvhcnt 0x0 1050; GFX12-NEXT: s_wait_kmcnt 0x0 1051; GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 1052; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1053; GFX12-NEXT: scratch_load_b32 v3, off, s32 scope:SCOPE_SYS 1054; GFX12-NEXT: s_wait_loadcnt 0x0 1055; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1056; GFX12-NEXT: s_wait_storecnt 0x0 1057; GFX12-NEXT: scratch_store_b32 v0, v2, s32 offset:256 scope:SCOPE_SYS 1058; GFX12-NEXT: s_wait_storecnt 0x0 1059; GFX12-NEXT: scratch_load_b32 v0, v1, s32 offset:256 scope:SCOPE_SYS 1060; GFX12-NEXT: s_wait_loadcnt 0x0 1061; GFX12-NEXT: s_setpc_b64 s[30:31] 1062; 1063; UNALIGNED_GFX9-LABEL: store_load_vindex_small_offset_foo: 1064; UNALIGNED_GFX9: ; %bb.0: ; %bb 1065; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1066; UNALIGNED_GFX9-NEXT: scratch_load_dword v1, off, s32 glc 1067; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1068; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1069; UNALIGNED_GFX9-NEXT: v_and_b32_e32 v0, 15, v0 1070; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, s32, v1 1071; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1072; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 0x100, v1 1073; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 15 1074; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s32, v0 1075; UNALIGNED_GFX9-NEXT: scratch_store_dword v1, v2, off 1076; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1077; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, 0x100, v0 1078; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off glc 1079; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1080; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 1081; 1082; UNALIGNED_GFX10-LABEL: store_load_vindex_small_offset_foo: 1083; UNALIGNED_GFX10: ; %bb.0: ; %bb 1084; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085; UNALIGNED_GFX10-NEXT: v_and_b32_e32 v1, 15, v0 1086; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1087; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 15 1088; UNALIGNED_GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc 1089; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1090; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1091; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s32, v0 1092; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, s32, v1 1093; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 1094; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 1095; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v2, off 1096; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1097; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc 1098; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1099; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 1100; 1101; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_foo: 1102; UNALIGNED_GFX940: ; %bb.0: ; %bb 1103; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 1105; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1106; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1107; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, s32, v1 1108; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 0x100, v1 1109; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 1110; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 15, v0 1111; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 1112; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1113; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1114; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 1115; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1116; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 1117; 1118; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_foo: 1119; UNALIGNED_GFX11: ; %bb.0: ; %bb 1120; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 1122; UNALIGNED_GFX11-NEXT: v_and_b32_e32 v0, 15, v0 1123; UNALIGNED_GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc 1124; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1125; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, s32, v1 1126; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1127; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1128; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 1129; UNALIGNED_GFX11-NEXT: scratch_store_b32 v1, v2, off dlc 1130; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1131; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v0, s32 offset:256 glc dlc 1132; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1133; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 1134; 1135; UNALIGNED_GFX12-LABEL: store_load_vindex_small_offset_foo: 1136; UNALIGNED_GFX12: ; %bb.0: ; %bb 1137; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1138; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 1139; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 1140; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 1141; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 1142; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 1143; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1144; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, s32 scope:SCOPE_SYS 1145; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1146; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1147; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1148; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, s32 offset:256 scope:SCOPE_SYS 1149; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1150; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s32 offset:256 scope:SCOPE_SYS 1151; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1152; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 1153bb: 1154 %padding = alloca [64 x i32], align 4, addrspace(5) 1155 %i = alloca [32 x float], align 4, addrspace(5) 1156 %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef 1157 %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 1158 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx 1159 store volatile i32 15, ptr addrspace(5) %i7, align 4 1160 %i9 = and i32 %idx, 15 1161 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 1162 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 1163 ret void 1164} 1165 1166define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { 1167; GFX9-LABEL: store_load_sindex_large_offset_kernel: 1168; GFX9: ; %bb.0: ; %bb 1169; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 1170; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 1171; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 1172; GFX9-NEXT: s_mov_b32 s1, 0 1173; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc 1174; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1175; GFX9-NEXT: s_lshl_b32 s1, s0, 2 1176; GFX9-NEXT: s_and_b32 s0, s0, 15 1177; GFX9-NEXT: v_mov_b32_e32 v0, 15 1178; GFX9-NEXT: s_addk_i32 s1, 0x4004 1179; GFX9-NEXT: s_lshl_b32 s0, s0, 2 1180; GFX9-NEXT: scratch_store_dword off, v0, s1 1181; GFX9-NEXT: s_waitcnt vmcnt(0) 1182; GFX9-NEXT: s_addk_i32 s0, 0x4004 1183; GFX9-NEXT: scratch_load_dword v0, off, s0 glc 1184; GFX9-NEXT: s_waitcnt vmcnt(0) 1185; GFX9-NEXT: s_endpgm 1186; 1187; GFX10-LABEL: store_load_sindex_large_offset_kernel: 1188; GFX10: ; %bb.0: ; %bb 1189; GFX10-NEXT: s_add_u32 s8, s8, s13 1190; GFX10-NEXT: s_addc_u32 s9, s9, 0 1191; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 1192; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 1193; GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 1194; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc 1195; GFX10-NEXT: s_waitcnt vmcnt(0) 1196; GFX10-NEXT: v_mov_b32_e32 v0, 15 1197; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1198; GFX10-NEXT: s_and_b32 s1, s0, 15 1199; GFX10-NEXT: s_lshl_b32 s0, s0, 2 1200; GFX10-NEXT: s_lshl_b32 s1, s1, 2 1201; GFX10-NEXT: s_addk_i32 s0, 0x4004 1202; GFX10-NEXT: s_addk_i32 s1, 0x4004 1203; GFX10-NEXT: scratch_store_dword off, v0, s0 1204; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1205; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc 1206; GFX10-NEXT: s_waitcnt vmcnt(0) 1207; GFX10-NEXT: s_endpgm 1208; 1209; GFX940-LABEL: store_load_sindex_large_offset_kernel: 1210; GFX940: ; %bb.0: ; %bb 1211; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 1212; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 1213; GFX940-NEXT: s_waitcnt vmcnt(0) 1214; GFX940-NEXT: v_mov_b32_e32 v0, 15 1215; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1216; GFX940-NEXT: s_lshl_b32 s1, s0, 2 1217; GFX940-NEXT: s_and_b32 s0, s0, 15 1218; GFX940-NEXT: s_addk_i32 s1, 0x4004 1219; GFX940-NEXT: s_lshl_b32 s0, s0, 2 1220; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 1221; GFX940-NEXT: s_waitcnt vmcnt(0) 1222; GFX940-NEXT: s_addk_i32 s0, 0x4004 1223; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 1224; GFX940-NEXT: s_waitcnt vmcnt(0) 1225; GFX940-NEXT: s_endpgm 1226; 1227; GFX11-LABEL: store_load_sindex_large_offset_kernel: 1228; GFX11: ; %bb.0: ; %bb 1229; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 1230; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc 1231; GFX11-NEXT: s_waitcnt vmcnt(0) 1232; GFX11-NEXT: v_mov_b32_e32 v0, 15 1233; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1234; GFX11-NEXT: s_and_b32 s1, s0, 15 1235; GFX11-NEXT: s_lshl_b32 s0, s0, 2 1236; GFX11-NEXT: s_lshl_b32 s1, s1, 2 1237; GFX11-NEXT: s_addk_i32 s0, 0x4004 1238; GFX11-NEXT: s_addk_i32 s1, 0x4004 1239; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc 1240; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1241; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc 1242; GFX11-NEXT: s_waitcnt vmcnt(0) 1243; GFX11-NEXT: s_endpgm 1244; 1245; GFX12-LABEL: store_load_sindex_large_offset_kernel: 1246; GFX12: ; %bb.0: ; %bb 1247; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 1248; GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS 1249; GFX12-NEXT: s_wait_loadcnt 0x0 1250; GFX12-NEXT: v_mov_b32_e32 v0, 15 1251; GFX12-NEXT: s_wait_kmcnt 0x0 1252; GFX12-NEXT: s_and_b32 s1, s0, 15 1253; GFX12-NEXT: s_lshl_b32 s0, s0, 2 1254; GFX12-NEXT: s_lshl_b32 s1, s1, 2 1255; GFX12-NEXT: s_addk_co_i32 s0, 0x4000 1256; GFX12-NEXT: s_addk_co_i32 s1, 0x4000 1257; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 1258; GFX12-NEXT: s_wait_storecnt 0x0 1259; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS 1260; GFX12-NEXT: s_wait_loadcnt 0x0 1261; GFX12-NEXT: s_endpgm 1262; 1263; UNALIGNED_GFX9-LABEL: store_load_sindex_large_offset_kernel: 1264; UNALIGNED_GFX9: ; %bb.0: ; %bb 1265; UNALIGNED_GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 1266; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 1267; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 1268; UNALIGNED_GFX9-NEXT: s_mov_b32 s1, 0 1269; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc 1270; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1271; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2 1272; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15 1273; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 1274; UNALIGNED_GFX9-NEXT: s_addk_i32 s1, 0x4004 1275; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2 1276; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1 1277; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1278; UNALIGNED_GFX9-NEXT: s_addk_i32 s0, 0x4004 1279; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc 1280; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1281; UNALIGNED_GFX9-NEXT: s_endpgm 1282; 1283; UNALIGNED_GFX10-LABEL: store_load_sindex_large_offset_kernel: 1284; UNALIGNED_GFX10: ; %bb.0: ; %bb 1285; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 1286; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 1287; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 1288; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 1289; UNALIGNED_GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 1290; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc 1291; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1292; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 15 1293; UNALIGNED_GFX10-NEXT: s_waitcnt lgkmcnt(0) 1294; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15 1295; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2 1296; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2 1297; UNALIGNED_GFX10-NEXT: s_addk_i32 s0, 0x4004 1298; UNALIGNED_GFX10-NEXT: s_addk_i32 s1, 0x4004 1299; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0 1300; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1301; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc 1302; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1303; UNALIGNED_GFX10-NEXT: s_endpgm 1304; 1305; UNALIGNED_GFX940-LABEL: store_load_sindex_large_offset_kernel: 1306; UNALIGNED_GFX940: ; %bb.0: ; %bb 1307; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 1308; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 1309; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1310; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 1311; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) 1312; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2 1313; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15 1314; UNALIGNED_GFX940-NEXT: s_addk_i32 s1, 0x4004 1315; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2 1316; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 1317; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1318; UNALIGNED_GFX940-NEXT: s_addk_i32 s0, 0x4004 1319; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 1320; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1321; UNALIGNED_GFX940-NEXT: s_endpgm 1322; 1323; UNALIGNED_GFX11-LABEL: store_load_sindex_large_offset_kernel: 1324; UNALIGNED_GFX11: ; %bb.0: ; %bb 1325; UNALIGNED_GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 1326; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc 1327; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1328; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v0, 15 1329; UNALIGNED_GFX11-NEXT: s_waitcnt lgkmcnt(0) 1330; UNALIGNED_GFX11-NEXT: s_and_b32 s1, s0, 15 1331; UNALIGNED_GFX11-NEXT: s_lshl_b32 s0, s0, 2 1332; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2 1333; UNALIGNED_GFX11-NEXT: s_addk_i32 s0, 0x4004 1334; UNALIGNED_GFX11-NEXT: s_addk_i32 s1, 0x4004 1335; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc 1336; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1337; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc 1338; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1339; UNALIGNED_GFX11-NEXT: s_endpgm 1340; 1341; UNALIGNED_GFX12-LABEL: store_load_sindex_large_offset_kernel: 1342; UNALIGNED_GFX12: ; %bb.0: ; %bb 1343; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 1344; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS 1345; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1346; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v0, 15 1347; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 1348; UNALIGNED_GFX12-NEXT: s_and_b32 s1, s0, 15 1349; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 2 1350; UNALIGNED_GFX12-NEXT: s_lshl_b32 s1, s1, 2 1351; UNALIGNED_GFX12-NEXT: s_addk_co_i32 s0, 0x4000 1352; UNALIGNED_GFX12-NEXT: s_addk_co_i32 s1, 0x4000 1353; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 1354; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1355; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS 1356; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1357; UNALIGNED_GFX12-NEXT: s_endpgm 1358bb: 1359 %padding = alloca [4096 x i32], align 4, addrspace(5) 1360 %i = alloca [32 x float], align 4, addrspace(5) 1361 %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef 1362 %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 1363 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx 1364 store volatile i32 15, ptr addrspace(5) %i7, align 4 1365 %i9 = and i32 %idx, 15 1366 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 1367 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 1368 ret void 1369} 1370 1371define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { 1372; GFX9-LABEL: store_load_vindex_large_offset_kernel: 1373; GFX9: ; %bb.0: ; %bb 1374; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 1375; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 1376; GFX9-NEXT: s_mov_b32 s0, 0 1377; GFX9-NEXT: scratch_load_dword v1, off, s0 offset:4 glc 1378; GFX9-NEXT: s_waitcnt vmcnt(0) 1379; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1380; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 1381; GFX9-NEXT: v_add_u32_e32 v1, 0x4004, v1 1382; GFX9-NEXT: v_mov_b32_e32 v2, 15 1383; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1384; GFX9-NEXT: scratch_store_dword v1, v2, off 1385; GFX9-NEXT: s_waitcnt vmcnt(0) 1386; GFX9-NEXT: v_add_u32_e32 v0, 0x4004, v0 1387; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc 1388; GFX9-NEXT: s_waitcnt vmcnt(0) 1389; GFX9-NEXT: s_endpgm 1390; 1391; GFX10-LABEL: store_load_vindex_large_offset_kernel: 1392; GFX10: ; %bb.0: ; %bb 1393; GFX10-NEXT: s_add_u32 s8, s8, s13 1394; GFX10-NEXT: s_addc_u32 s9, s9, 0 1395; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 1396; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 1397; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 1398; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1399; GFX10-NEXT: v_mov_b32_e32 v2, 15 1400; GFX10-NEXT: scratch_load_dword v3, off, off offset:4 glc dlc 1401; GFX10-NEXT: s_waitcnt vmcnt(0) 1402; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1403; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x4004, v0 1404; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1405; GFX10-NEXT: scratch_store_dword v0, v2, off 1406; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1407; GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc 1408; GFX10-NEXT: s_waitcnt vmcnt(0) 1409; GFX10-NEXT: s_endpgm 1410; 1411; GFX940-LABEL: store_load_vindex_large_offset_kernel: 1412; GFX940: ; %bb.0: ; %bb 1413; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 1414; GFX940-NEXT: s_waitcnt vmcnt(0) 1415; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1416; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1417; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 1418; GFX940-NEXT: v_mov_b32_e32 v2, 15 1419; GFX940-NEXT: s_movk_i32 s0, 0x4004 1420; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1421; GFX940-NEXT: scratch_store_dword v1, v2, s0 sc0 sc1 1422; GFX940-NEXT: s_waitcnt vmcnt(0) 1423; GFX940-NEXT: v_add_u32_e32 v0, 0x4004, v0 1424; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 1425; GFX940-NEXT: s_waitcnt vmcnt(0) 1426; GFX940-NEXT: s_endpgm 1427; 1428; GFX11-LABEL: store_load_vindex_large_offset_kernel: 1429; GFX11: ; %bb.0: ; %bb 1430; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1431; GFX11-NEXT: s_movk_i32 s0, 0x4004 1432; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc 1433; GFX11-NEXT: s_waitcnt vmcnt(0) 1434; GFX11-NEXT: v_mov_b32_e32 v2, 15 1435; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 1436; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1437; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1438; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1439; GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc 1440; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1441; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1442; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:124 glc dlc 1443; GFX11-NEXT: s_waitcnt vmcnt(0) 1444; GFX11-NEXT: s_endpgm 1445; 1446; GFX12-LABEL: store_load_vindex_large_offset_kernel: 1447; GFX12: ; %bb.0: ; %bb 1448; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1449; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS 1450; GFX12-NEXT: s_wait_loadcnt 0x0 1451; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0 1452; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1453; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 1454; GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1 1455; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16384 scope:SCOPE_SYS 1456; GFX12-NEXT: s_wait_storecnt 0x0 1457; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:16508 scope:SCOPE_SYS 1458; GFX12-NEXT: s_wait_loadcnt 0x0 1459; GFX12-NEXT: s_endpgm 1460; 1461; UNALIGNED_GFX9-LABEL: store_load_vindex_large_offset_kernel: 1462; UNALIGNED_GFX9: ; %bb.0: ; %bb 1463; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 1464; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 1465; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0 1466; UNALIGNED_GFX9-NEXT: scratch_load_dword v1, off, s0 offset:4 glc 1467; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1468; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1469; UNALIGNED_GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 1470; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 0x4004, v1 1471; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 15 1472; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1473; UNALIGNED_GFX9-NEXT: scratch_store_dword v1, v2, off 1474; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1475; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, 0x4004, v0 1476; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc 1477; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1478; UNALIGNED_GFX9-NEXT: s_endpgm 1479; 1480; UNALIGNED_GFX10-LABEL: store_load_vindex_large_offset_kernel: 1481; UNALIGNED_GFX10: ; %bb.0: ; %bb 1482; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 1483; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 1484; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 1485; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 1486; UNALIGNED_GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 1487; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1488; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 15 1489; UNALIGNED_GFX10-NEXT: scratch_load_dword v3, off, off offset:4 glc dlc 1490; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1491; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1492; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, 0x4004, v0 1493; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1494; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v2, off 1495; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1496; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v1, off offset:124 glc dlc 1497; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1498; UNALIGNED_GFX10-NEXT: s_endpgm 1499; 1500; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_kernel: 1501; UNALIGNED_GFX940: ; %bb.0: ; %bb 1502; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 1503; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1504; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1505; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1506; UNALIGNED_GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 1507; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 1508; UNALIGNED_GFX940-NEXT: s_movk_i32 s0, 0x4004 1509; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1510; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, s0 sc0 sc1 1511; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1512; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, 0x4004, v0 1513; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 1514; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1515; UNALIGNED_GFX940-NEXT: s_endpgm 1516; 1517; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_kernel: 1518; UNALIGNED_GFX11: ; %bb.0: ; %bb 1519; UNALIGNED_GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1520; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x4004 1521; UNALIGNED_GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc 1522; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1523; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v2, 15 1524; UNALIGNED_GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 1525; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1526; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1527; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1528; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc 1529; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1530; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1531; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off offset:124 glc dlc 1532; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1533; UNALIGNED_GFX11-NEXT: s_endpgm 1534; 1535; UNALIGNED_GFX12-LABEL: store_load_vindex_large_offset_kernel: 1536; UNALIGNED_GFX12: ; %bb.0: ; %bb 1537; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1538; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS 1539; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1540; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0 1541; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1542; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 1543; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1 1544; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16384 scope:SCOPE_SYS 1545; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1546; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:16508 scope:SCOPE_SYS 1547; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1548; UNALIGNED_GFX12-NEXT: s_endpgm 1549bb: 1550 %padding = alloca [4096 x i32], align 4, addrspace(5) 1551 %i = alloca [32 x float], align 4, addrspace(5) 1552 %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef 1553 %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 1554 %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() 1555 %i3 = zext i32 %i2 to i64 1556 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2 1557 store volatile i32 15, ptr addrspace(5) %i7, align 4 1558 %i9 = sub nsw i32 31, %i2 1559 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 1560 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 1561 ret void 1562} 1563 1564define void @store_load_vindex_large_offset_foo(i32 %idx) { 1565; GFX9-LABEL: store_load_vindex_large_offset_foo: 1566; GFX9: ; %bb.0: ; %bb 1567; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1568; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc 1569; GFX9-NEXT: s_waitcnt vmcnt(0) 1570; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1571; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 1572; GFX9-NEXT: v_add_u32_e32 v1, s32, v1 1573; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1574; GFX9-NEXT: v_add_u32_e32 v1, 0x4004, v1 1575; GFX9-NEXT: v_mov_b32_e32 v2, 15 1576; GFX9-NEXT: v_add_u32_e32 v0, s32, v0 1577; GFX9-NEXT: scratch_store_dword v1, v2, off 1578; GFX9-NEXT: s_waitcnt vmcnt(0) 1579; GFX9-NEXT: v_add_u32_e32 v0, 0x4004, v0 1580; GFX9-NEXT: scratch_load_dword v0, v0, off glc 1581; GFX9-NEXT: s_waitcnt vmcnt(0) 1582; GFX9-NEXT: s_setpc_b64 s[30:31] 1583; 1584; GFX10-LABEL: store_load_vindex_large_offset_foo: 1585; GFX10: ; %bb.0: ; %bb 1586; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1587; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 1588; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1589; GFX10-NEXT: v_mov_b32_e32 v2, 15 1590; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc 1591; GFX10-NEXT: s_waitcnt vmcnt(0) 1592; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1593; GFX10-NEXT: v_add_nc_u32_e32 v0, s32, v0 1594; GFX10-NEXT: v_add_nc_u32_e32 v1, s32, v1 1595; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x4004, v0 1596; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1597; GFX10-NEXT: scratch_store_dword v0, v2, off 1598; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1599; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc 1600; GFX10-NEXT: s_waitcnt vmcnt(0) 1601; GFX10-NEXT: s_setpc_b64 s[30:31] 1602; 1603; GFX940-LABEL: store_load_vindex_large_offset_foo: 1604; GFX940: ; %bb.0: ; %bb 1605; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1606; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 1607; GFX940-NEXT: s_waitcnt vmcnt(0) 1608; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1609; GFX940-NEXT: v_add_u32_e32 v1, s32, v1 1610; GFX940-NEXT: v_add_u32_e32 v1, 0x4004, v1 1611; GFX940-NEXT: v_mov_b32_e32 v2, 15 1612; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 1613; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 1614; GFX940-NEXT: s_waitcnt vmcnt(0) 1615; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1616; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 1617; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 1618; GFX940-NEXT: s_waitcnt vmcnt(0) 1619; GFX940-NEXT: s_setpc_b64 s[30:31] 1620; 1621; GFX11-LABEL: store_load_vindex_large_offset_foo: 1622; GFX11: ; %bb.0: ; %bb 1623; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1624; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 1625; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 1626; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 1627; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc 1628; GFX11-NEXT: s_waitcnt vmcnt(0) 1629; GFX11-NEXT: v_add_nc_u32_e32 v1, s32, v1 1630; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1631; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1632; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1633; GFX11-NEXT: scratch_store_b32 v1, v2, off dlc 1634; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1635; GFX11-NEXT: scratch_load_b32 v0, v0, s0 glc dlc 1636; GFX11-NEXT: s_waitcnt vmcnt(0) 1637; GFX11-NEXT: s_setpc_b64 s[30:31] 1638; 1639; GFX12-LABEL: store_load_vindex_large_offset_foo: 1640; GFX12: ; %bb.0: ; %bb 1641; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1642; GFX12-NEXT: s_wait_expcnt 0x0 1643; GFX12-NEXT: s_wait_samplecnt 0x0 1644; GFX12-NEXT: s_wait_bvhcnt 0x0 1645; GFX12-NEXT: s_wait_kmcnt 0x0 1646; GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 1647; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1648; GFX12-NEXT: scratch_load_b32 v3, off, s32 scope:SCOPE_SYS 1649; GFX12-NEXT: s_wait_loadcnt 0x0 1650; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1651; GFX12-NEXT: s_wait_storecnt 0x0 1652; GFX12-NEXT: scratch_store_b32 v0, v2, s32 offset:16384 scope:SCOPE_SYS 1653; GFX12-NEXT: s_wait_storecnt 0x0 1654; GFX12-NEXT: scratch_load_b32 v0, v1, s32 offset:16384 scope:SCOPE_SYS 1655; GFX12-NEXT: s_wait_loadcnt 0x0 1656; GFX12-NEXT: s_setpc_b64 s[30:31] 1657; 1658; UNALIGNED_GFX9-LABEL: store_load_vindex_large_offset_foo: 1659; UNALIGNED_GFX9: ; %bb.0: ; %bb 1660; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1661; UNALIGNED_GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc 1662; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1663; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1664; UNALIGNED_GFX9-NEXT: v_and_b32_e32 v0, 15, v0 1665; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, s32, v1 1666; UNALIGNED_GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1667; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 0x4004, v1 1668; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 15 1669; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s32, v0 1670; UNALIGNED_GFX9-NEXT: scratch_store_dword v1, v2, off 1671; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1672; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, 0x4004, v0 1673; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off glc 1674; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1675; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 1676; 1677; UNALIGNED_GFX10-LABEL: store_load_vindex_large_offset_foo: 1678; UNALIGNED_GFX10: ; %bb.0: ; %bb 1679; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1680; UNALIGNED_GFX10-NEXT: v_and_b32_e32 v1, 15, v0 1681; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1682; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 15 1683; UNALIGNED_GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc 1684; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1685; UNALIGNED_GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1686; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s32, v0 1687; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, s32, v1 1688; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, 0x4004, v0 1689; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1690; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v2, off 1691; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1692; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc 1693; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1694; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 1695; 1696; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_foo: 1697; UNALIGNED_GFX940: ; %bb.0: ; %bb 1698; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1699; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 1700; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1701; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1702; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, s32, v1 1703; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 0x4004, v1 1704; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 1705; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 15, v0 1706; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 1707; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1708; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1709; UNALIGNED_GFX940-NEXT: s_add_i32 s0, s32, 0x4004 1710; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 1711; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1712; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 1713; 1714; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_foo: 1715; UNALIGNED_GFX11: ; %bb.0: ; %bb 1716; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1717; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 1718; UNALIGNED_GFX11-NEXT: v_and_b32_e32 v0, 15, v0 1719; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x4004 1720; UNALIGNED_GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc 1721; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1722; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, s32, v1 1723; UNALIGNED_GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1724; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1725; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 0x4004, v1 1726; UNALIGNED_GFX11-NEXT: scratch_store_b32 v1, v2, off dlc 1727; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1728; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v0, s0 glc dlc 1729; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1730; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 1731; 1732; UNALIGNED_GFX12-LABEL: store_load_vindex_large_offset_foo: 1733; UNALIGNED_GFX12: ; %bb.0: ; %bb 1734; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1735; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 1736; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 1737; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 1738; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 1739; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 1740; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1741; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, s32 scope:SCOPE_SYS 1742; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1743; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1744; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1745; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, s32 offset:16384 scope:SCOPE_SYS 1746; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1747; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s32 offset:16384 scope:SCOPE_SYS 1748; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1749; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 1750bb: 1751 %padding = alloca [4096 x i32], align 4, addrspace(5) 1752 %i = alloca [32 x float], align 4, addrspace(5) 1753 %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef 1754 %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4 1755 %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx 1756 store volatile i32 15, ptr addrspace(5) %i7, align 4 1757 %i9 = and i32 %idx, 15 1758 %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9 1759 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 1760 ret void 1761} 1762 1763define amdgpu_kernel void @store_load_large_imm_offset_kernel() { 1764; GFX9-LABEL: store_load_large_imm_offset_kernel: 1765; GFX9: ; %bb.0: ; %bb 1766; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 1767; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 1768; GFX9-NEXT: v_mov_b32_e32 v0, 13 1769; GFX9-NEXT: s_mov_b32 s0, 0 1770; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 1771; GFX9-NEXT: s_waitcnt vmcnt(0) 1772; GFX9-NEXT: s_movk_i32 s0, 0x3e80 1773; GFX9-NEXT: v_mov_b32_e32 v0, 15 1774; GFX9-NEXT: s_add_i32 s0, s0, 4 1775; GFX9-NEXT: scratch_store_dword off, v0, s0 1776; GFX9-NEXT: s_waitcnt vmcnt(0) 1777; GFX9-NEXT: scratch_load_dword v0, off, s0 glc 1778; GFX9-NEXT: s_waitcnt vmcnt(0) 1779; GFX9-NEXT: s_endpgm 1780; 1781; GFX10-LABEL: store_load_large_imm_offset_kernel: 1782; GFX10: ; %bb.0: ; %bb 1783; GFX10-NEXT: s_add_u32 s8, s8, s13 1784; GFX10-NEXT: s_addc_u32 s9, s9, 0 1785; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 1786; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 1787; GFX10-NEXT: v_mov_b32_e32 v0, 13 1788; GFX10-NEXT: v_mov_b32_e32 v1, 15 1789; GFX10-NEXT: s_movk_i32 s0, 0x3e80 1790; GFX10-NEXT: s_add_i32 s0, s0, 4 1791; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 1792; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1793; GFX10-NEXT: scratch_store_dword off, v1, s0 1794; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1795; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc 1796; GFX10-NEXT: s_waitcnt vmcnt(0) 1797; GFX10-NEXT: s_endpgm 1798; 1799; GFX940-LABEL: store_load_large_imm_offset_kernel: 1800; GFX940: ; %bb.0: ; %bb 1801; GFX940-NEXT: v_mov_b32_e32 v0, 13 1802; GFX940-NEXT: s_movk_i32 s0, 0x3e80 1803; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 1804; GFX940-NEXT: s_waitcnt vmcnt(0) 1805; GFX940-NEXT: v_mov_b32_e32 v0, 15 1806; GFX940-NEXT: s_add_i32 s0, s0, 4 1807; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 1808; GFX940-NEXT: s_waitcnt vmcnt(0) 1809; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 1810; GFX940-NEXT: s_waitcnt vmcnt(0) 1811; GFX940-NEXT: s_endpgm 1812; 1813; GFX11-LABEL: store_load_large_imm_offset_kernel: 1814; GFX11: ; %bb.0: ; %bb 1815; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 1816; GFX11-NEXT: s_movk_i32 s0, 0x3e80 1817; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1818; GFX11-NEXT: s_add_i32 s0, s0, 4 1819; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc 1820; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1821; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc 1822; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1823; GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc 1824; GFX11-NEXT: s_waitcnt vmcnt(0) 1825; GFX11-NEXT: s_endpgm 1826; 1827; GFX12-LABEL: store_load_large_imm_offset_kernel: 1828; GFX12: ; %bb.0: ; %bb 1829; GFX12-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 1830; GFX12-NEXT: scratch_store_b32 off, v0, off scope:SCOPE_SYS 1831; GFX12-NEXT: s_wait_storecnt 0x0 1832; GFX12-NEXT: scratch_store_b32 off, v1, off offset:16000 scope:SCOPE_SYS 1833; GFX12-NEXT: s_wait_storecnt 0x0 1834; GFX12-NEXT: scratch_load_b32 v0, off, off offset:16000 scope:SCOPE_SYS 1835; GFX12-NEXT: s_wait_loadcnt 0x0 1836; GFX12-NEXT: s_endpgm 1837; 1838; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_kernel: 1839; UNALIGNED_GFX9: ; %bb.0: ; %bb 1840; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 1841; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 1842; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13 1843; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0 1844; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 1845; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1846; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80 1847; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 1848; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4 1849; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 1850; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1851; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc 1852; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 1853; UNALIGNED_GFX9-NEXT: s_endpgm 1854; 1855; UNALIGNED_GFX10-LABEL: store_load_large_imm_offset_kernel: 1856; UNALIGNED_GFX10: ; %bb.0: ; %bb 1857; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 1858; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 1859; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 1860; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 1861; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13 1862; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 1863; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80 1864; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4 1865; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4 1866; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1867; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0 1868; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1869; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc 1870; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 1871; UNALIGNED_GFX10-NEXT: s_endpgm 1872; 1873; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_kernel: 1874; UNALIGNED_GFX940: ; %bb.0: ; %bb 1875; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 13 1876; UNALIGNED_GFX940-NEXT: s_movk_i32 s0, 0x3e80 1877; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 1878; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1879; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 1880; UNALIGNED_GFX940-NEXT: s_add_i32 s0, s0, 4 1881; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 1882; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1883; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 1884; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 1885; UNALIGNED_GFX940-NEXT: s_endpgm 1886; 1887; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel: 1888; UNALIGNED_GFX11: ; %bb.0: ; %bb 1889; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 1890; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80 1891; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1892; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4 1893; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc 1894; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1895; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc 1896; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1897; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc 1898; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 1899; UNALIGNED_GFX11-NEXT: s_endpgm 1900; 1901; UNALIGNED_GFX12-LABEL: store_load_large_imm_offset_kernel: 1902; UNALIGNED_GFX12: ; %bb.0: ; %bb 1903; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 1904; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v0, off scope:SCOPE_SYS 1905; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1906; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v1, off offset:16000 scope:SCOPE_SYS 1907; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 1908; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, off offset:16000 scope:SCOPE_SYS 1909; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 1910; UNALIGNED_GFX12-NEXT: s_endpgm 1911bb: 1912 %i = alloca [4096 x i32], align 4, addrspace(5) 1913 %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef 1914 store volatile i32 13, ptr addrspace(5) %i1, align 4 1915 %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 1916 store volatile i32 15, ptr addrspace(5) %i7, align 4 1917 %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 1918 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 1919 ret void 1920} 1921 1922define void @store_load_large_imm_offset_foo() { 1923; GFX9-LABEL: store_load_large_imm_offset_foo: 1924; GFX9: ; %bb.0: ; %bb 1925; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1926; GFX9-NEXT: s_movk_i32 s0, 0x3e80 1927; GFX9-NEXT: v_mov_b32_e32 v0, 13 1928; GFX9-NEXT: s_add_i32 s1, s32, s0 1929; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 1930; GFX9-NEXT: s_waitcnt vmcnt(0) 1931; GFX9-NEXT: v_mov_b32_e32 v0, 15 1932; GFX9-NEXT: s_add_i32 s0, s1, 4 1933; GFX9-NEXT: scratch_store_dword off, v0, s0 1934; GFX9-NEXT: s_waitcnt vmcnt(0) 1935; GFX9-NEXT: scratch_load_dword v0, off, s0 glc 1936; GFX9-NEXT: s_waitcnt vmcnt(0) 1937; GFX9-NEXT: s_setpc_b64 s[30:31] 1938; 1939; GFX10-LABEL: store_load_large_imm_offset_foo: 1940; GFX10: ; %bb.0: ; %bb 1941; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1942; GFX10-NEXT: v_mov_b32_e32 v0, 13 1943; GFX10-NEXT: s_movk_i32 s0, 0x3e80 1944; GFX10-NEXT: v_mov_b32_e32 v1, 15 1945; GFX10-NEXT: s_add_i32 s1, s32, s0 1946; GFX10-NEXT: s_add_i32 s0, s1, 4 1947; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 1948; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1949; GFX10-NEXT: scratch_store_dword off, v1, s0 1950; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1951; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc 1952; GFX10-NEXT: s_waitcnt vmcnt(0) 1953; GFX10-NEXT: s_setpc_b64 s[30:31] 1954; 1955; GFX940-LABEL: store_load_large_imm_offset_foo: 1956; GFX940: ; %bb.0: ; %bb 1957; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1958; GFX940-NEXT: s_movk_i32 s0, 0x3e80 1959; GFX940-NEXT: v_mov_b32_e32 v0, 13 1960; GFX940-NEXT: s_add_i32 s1, s32, s0 1961; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 1962; GFX940-NEXT: s_waitcnt vmcnt(0) 1963; GFX940-NEXT: v_mov_b32_e32 v0, 15 1964; GFX940-NEXT: s_add_i32 s0, s1, 4 1965; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 1966; GFX940-NEXT: s_waitcnt vmcnt(0) 1967; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 1968; GFX940-NEXT: s_waitcnt vmcnt(0) 1969; GFX940-NEXT: s_setpc_b64 s[30:31] 1970; 1971; GFX11-LABEL: store_load_large_imm_offset_foo: 1972; GFX11: ; %bb.0: ; %bb 1973; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1974; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 1975; GFX11-NEXT: s_movk_i32 s0, 0x3e80 1976; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1977; GFX11-NEXT: s_add_i32 s1, s32, s0 1978; GFX11-NEXT: s_add_i32 s0, s1, 4 1979; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc 1980; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1981; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc 1982; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1983; GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc 1984; GFX11-NEXT: s_waitcnt vmcnt(0) 1985; GFX11-NEXT: s_setpc_b64 s[30:31] 1986; 1987; GFX12-LABEL: store_load_large_imm_offset_foo: 1988; GFX12: ; %bb.0: ; %bb 1989; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1990; GFX12-NEXT: s_wait_expcnt 0x0 1991; GFX12-NEXT: s_wait_samplecnt 0x0 1992; GFX12-NEXT: s_wait_bvhcnt 0x0 1993; GFX12-NEXT: s_wait_kmcnt 0x0 1994; GFX12-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 1995; GFX12-NEXT: s_wait_storecnt 0x0 1996; GFX12-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SYS 1997; GFX12-NEXT: s_wait_storecnt 0x0 1998; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16000 scope:SCOPE_SYS 1999; GFX12-NEXT: s_wait_storecnt 0x0 2000; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16000 scope:SCOPE_SYS 2001; GFX12-NEXT: s_wait_loadcnt 0x0 2002; GFX12-NEXT: s_setpc_b64 s[30:31] 2003; 2004; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo: 2005; UNALIGNED_GFX9: ; %bb.0: ; %bb 2006; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2007; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80 2008; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13 2009; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0 2010; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 2011; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2012; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 2013; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4 2014; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 2015; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2016; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc 2017; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2018; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 2019; 2020; UNALIGNED_GFX10-LABEL: store_load_large_imm_offset_foo: 2021; UNALIGNED_GFX10: ; %bb.0: ; %bb 2022; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2023; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13 2024; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80 2025; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 2026; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0 2027; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4 2028; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 2029; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2030; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0 2031; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2032; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc 2033; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2034; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 2035; 2036; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_foo: 2037; UNALIGNED_GFX940: ; %bb.0: ; %bb 2038; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2039; UNALIGNED_GFX940-NEXT: s_movk_i32 s0, 0x3e80 2040; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 13 2041; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s32, s0 2042; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 2043; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2044; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 2045; UNALIGNED_GFX940-NEXT: s_add_i32 s0, s1, 4 2046; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 2047; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2048; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 2049; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2050; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 2051; 2052; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_foo: 2053; UNALIGNED_GFX11: ; %bb.0: ; %bb 2054; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2055; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 2056; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80 2057; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 2058; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0 2059; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4 2060; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc 2061; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2062; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc 2063; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2064; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc 2065; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2066; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 2067; 2068; UNALIGNED_GFX12-LABEL: store_load_large_imm_offset_foo: 2069; UNALIGNED_GFX12: ; %bb.0: ; %bb 2070; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2071; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 2072; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 2073; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 2074; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 2075; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 2076; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2077; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SYS 2078; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2079; UNALIGNED_GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16000 scope:SCOPE_SYS 2080; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2081; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16000 scope:SCOPE_SYS 2082; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2083; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 2084bb: 2085 %i = alloca [4096 x i32], align 4, addrspace(5) 2086 %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef 2087 store volatile i32 13, ptr addrspace(5) %i1, align 4 2088 %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 2089 store volatile i32 15, ptr addrspace(5) %i7, align 4 2090 %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000 2091 %i12 = load volatile i32, ptr addrspace(5) %i10, align 4 2092 ret void 2093} 2094 2095define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { 2096; GFX9-LABEL: store_load_vidx_sidx_offset: 2097; GFX9: ; %bb.0: ; %bb 2098; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 2099; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 2100; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 2101; GFX9-NEXT: v_mov_b32_e32 v1, 15 2102; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2103; GFX9-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2104; GFX9-NEXT: scratch_store_dword v0, v1, off offset:1024 2105; GFX9-NEXT: s_waitcnt vmcnt(0) 2106; GFX9-NEXT: scratch_load_dword v0, v0, off offset:1024 glc 2107; GFX9-NEXT: s_waitcnt vmcnt(0) 2108; GFX9-NEXT: s_endpgm 2109; 2110; GFX10-LABEL: store_load_vidx_sidx_offset: 2111; GFX10: ; %bb.0: ; %bb 2112; GFX10-NEXT: s_add_u32 s8, s8, s13 2113; GFX10-NEXT: s_addc_u32 s9, s9, 0 2114; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 2115; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 2116; GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 2117; GFX10-NEXT: v_mov_b32_e32 v1, 15 2118; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2119; GFX10-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2120; GFX10-NEXT: scratch_store_dword v0, v1, off offset:1024 2121; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2122; GFX10-NEXT: scratch_load_dword v0, v0, off offset:1024 glc dlc 2123; GFX10-NEXT: s_waitcnt vmcnt(0) 2124; GFX10-NEXT: s_endpgm 2125; 2126; GFX940-LABEL: store_load_vidx_sidx_offset: 2127; GFX940: ; %bb.0: ; %bb 2128; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 2129; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2130; GFX940-NEXT: v_mov_b32_e32 v1, 15 2131; GFX940-NEXT: s_waitcnt lgkmcnt(0) 2132; GFX940-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2133; GFX940-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 2134; GFX940-NEXT: s_waitcnt vmcnt(0) 2135; GFX940-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 2136; GFX940-NEXT: s_waitcnt vmcnt(0) 2137; GFX940-NEXT: s_endpgm 2138; 2139; GFX11-LABEL: store_load_vidx_sidx_offset: 2140; GFX11: ; %bb.0: ; %bb 2141; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 2142; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 2143; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2144; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2145; GFX11-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2146; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:1024 dlc 2147; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2148; GFX11-NEXT: scratch_load_b32 v0, v0, off offset:1024 glc dlc 2149; GFX11-NEXT: s_waitcnt vmcnt(0) 2150; GFX11-NEXT: s_endpgm 2151; 2152; GFX12-LABEL: store_load_vidx_sidx_offset: 2153; GFX12: ; %bb.0: ; %bb 2154; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 2155; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 2156; GFX12-NEXT: s_wait_kmcnt 0x0 2157; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2158; GFX12-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2159; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:1024 scope:SCOPE_SYS 2160; GFX12-NEXT: s_wait_storecnt 0x0 2161; GFX12-NEXT: scratch_load_b32 v0, v0, off offset:1024 scope:SCOPE_SYS 2162; GFX12-NEXT: s_wait_loadcnt 0x0 2163; GFX12-NEXT: s_endpgm 2164; 2165; UNALIGNED_GFX9-LABEL: store_load_vidx_sidx_offset: 2166; UNALIGNED_GFX9: ; %bb.0: ; %bb 2167; UNALIGNED_GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 2168; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 2169; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 2170; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 2171; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0) 2172; UNALIGNED_GFX9-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2173; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off offset:1024 2174; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2175; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, v0, off offset:1024 glc 2176; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2177; UNALIGNED_GFX9-NEXT: s_endpgm 2178; 2179; UNALIGNED_GFX10-LABEL: store_load_vidx_sidx_offset: 2180; UNALIGNED_GFX10: ; %bb.0: ; %bb 2181; UNALIGNED_GFX10-NEXT: s_add_u32 s8, s8, s13 2182; UNALIGNED_GFX10-NEXT: s_addc_u32 s9, s9, 0 2183; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 2184; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 2185; UNALIGNED_GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 2186; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 2187; UNALIGNED_GFX10-NEXT: s_waitcnt lgkmcnt(0) 2188; UNALIGNED_GFX10-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2189; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:1024 2190; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2191; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, v0, off offset:1024 glc dlc 2192; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2193; UNALIGNED_GFX10-NEXT: s_endpgm 2194; 2195; UNALIGNED_GFX940-LABEL: store_load_vidx_sidx_offset: 2196; UNALIGNED_GFX940: ; %bb.0: ; %bb 2197; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 2198; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2199; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 2200; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) 2201; UNALIGNED_GFX940-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2202; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 2203; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2204; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 2205; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2206; UNALIGNED_GFX940-NEXT: s_endpgm 2207; 2208; UNALIGNED_GFX11-LABEL: store_load_vidx_sidx_offset: 2209; UNALIGNED_GFX11: ; %bb.0: ; %bb 2210; UNALIGNED_GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 2211; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 2212; UNALIGNED_GFX11-NEXT: s_waitcnt lgkmcnt(0) 2213; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2214; UNALIGNED_GFX11-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2215; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:1024 dlc 2216; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2217; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v0, off offset:1024 glc dlc 2218; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2219; UNALIGNED_GFX11-NEXT: s_endpgm 2220; 2221; UNALIGNED_GFX12-LABEL: store_load_vidx_sidx_offset: 2222; UNALIGNED_GFX12: ; %bb.0: ; %bb 2223; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0 2224; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 2225; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 2226; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2227; UNALIGNED_GFX12-NEXT: v_add_lshl_u32 v0, s0, v0, 2 2228; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:1024 scope:SCOPE_SYS 2229; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2230; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v0, off offset:1024 scope:SCOPE_SYS 2231; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2232; UNALIGNED_GFX12-NEXT: s_endpgm 2233bb: 2234 %alloca = alloca [32 x i32], align 4, addrspace(5) 2235 %vidx = tail call i32 @llvm.amdgcn.workitem.id.x() 2236 %add1 = add nsw i32 %sidx, %vidx 2237 %add2 = add nsw i32 %add1, 256 2238 %gep = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %add2 2239 store volatile i32 15, ptr addrspace(5) %gep, align 4 2240 %load = load volatile i32, ptr addrspace(5) %gep, align 4 2241 ret void 2242} 2243 2244define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) { 2245; GFX9-LABEL: store_load_i64_aligned: 2246; GFX9: ; %bb.0: ; %bb 2247; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2248; GFX9-NEXT: v_mov_b32_e32 v1, 15 2249; GFX9-NEXT: v_mov_b32_e32 v2, 0 2250; GFX9-NEXT: scratch_store_dwordx2 v0, v[1:2], off 2251; GFX9-NEXT: s_waitcnt vmcnt(0) 2252; GFX9-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc 2253; GFX9-NEXT: s_waitcnt vmcnt(0) 2254; GFX9-NEXT: s_setpc_b64 s[30:31] 2255; 2256; GFX10-LABEL: store_load_i64_aligned: 2257; GFX10: ; %bb.0: ; %bb 2258; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2259; GFX10-NEXT: v_mov_b32_e32 v1, 15 2260; GFX10-NEXT: v_mov_b32_e32 v2, 0 2261; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off 2262; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2263; GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc 2264; GFX10-NEXT: s_waitcnt vmcnt(0) 2265; GFX10-NEXT: s_setpc_b64 s[30:31] 2266; 2267; GFX940-LABEL: store_load_i64_aligned: 2268; GFX940: ; %bb.0: ; %bb 2269; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2270; GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 2271; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 2272; GFX940-NEXT: s_waitcnt vmcnt(0) 2273; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 2274; GFX940-NEXT: s_waitcnt vmcnt(0) 2275; GFX940-NEXT: s_setpc_b64 s[30:31] 2276; 2277; GFX11-LABEL: store_load_i64_aligned: 2278; GFX11: ; %bb.0: ; %bb 2279; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2280; GFX11-NEXT: v_mov_b32_e32 v1, 15 2281; GFX11-NEXT: v_mov_b32_e32 v2, 0 2282; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc 2283; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2284; GFX11-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc 2285; GFX11-NEXT: s_waitcnt vmcnt(0) 2286; GFX11-NEXT: s_setpc_b64 s[30:31] 2287; 2288; GFX12-LABEL: store_load_i64_aligned: 2289; GFX12: ; %bb.0: ; %bb 2290; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2291; GFX12-NEXT: s_wait_expcnt 0x0 2292; GFX12-NEXT: s_wait_samplecnt 0x0 2293; GFX12-NEXT: s_wait_bvhcnt 0x0 2294; GFX12-NEXT: s_wait_kmcnt 0x0 2295; GFX12-NEXT: v_mov_b32_e32 v1, 15 2296; GFX12-NEXT: v_mov_b32_e32 v2, 0 2297; GFX12-NEXT: s_wait_storecnt 0x0 2298; GFX12-NEXT: scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS 2299; GFX12-NEXT: s_wait_storecnt 0x0 2300; GFX12-NEXT: scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS 2301; GFX12-NEXT: s_wait_loadcnt 0x0 2302; GFX12-NEXT: s_setpc_b64 s[30:31] 2303; 2304; UNALIGNED_GFX9-LABEL: store_load_i64_aligned: 2305; UNALIGNED_GFX9: ; %bb.0: ; %bb 2306; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2307; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 2308; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 0 2309; UNALIGNED_GFX9-NEXT: scratch_store_dwordx2 v0, v[1:2], off 2310; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2311; UNALIGNED_GFX9-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc 2312; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2313; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 2314; 2315; UNALIGNED_GFX10-LABEL: store_load_i64_aligned: 2316; UNALIGNED_GFX10: ; %bb.0: ; %bb 2317; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2318; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 2319; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 0 2320; UNALIGNED_GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off 2321; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2322; UNALIGNED_GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc 2323; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2324; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 2325; 2326; UNALIGNED_GFX940-LABEL: store_load_i64_aligned: 2327; UNALIGNED_GFX940: ; %bb.0: ; %bb 2328; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2329; UNALIGNED_GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 2330; UNALIGNED_GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 2331; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2332; UNALIGNED_GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 2333; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2334; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 2335; 2336; UNALIGNED_GFX11-LABEL: store_load_i64_aligned: 2337; UNALIGNED_GFX11: ; %bb.0: ; %bb 2338; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2339; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v1, 15 2340; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v2, 0 2341; UNALIGNED_GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc 2342; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2343; UNALIGNED_GFX11-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc 2344; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2345; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 2346; 2347; UNALIGNED_GFX12-LABEL: store_load_i64_aligned: 2348; UNALIGNED_GFX12: ; %bb.0: ; %bb 2349; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2350; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 2351; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 2352; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 2353; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 2354; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 15 2355; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 0 2356; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2357; UNALIGNED_GFX12-NEXT: scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS 2358; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2359; UNALIGNED_GFX12-NEXT: scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS 2360; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2361; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 2362bb: 2363 store volatile i64 15, ptr addrspace(5) %arg, align 8 2364 %load = load volatile i64, ptr addrspace(5) %arg, align 8 2365 ret void 2366} 2367 2368define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { 2369; GFX9-LABEL: store_load_i64_unaligned: 2370; GFX9: ; %bb.0: ; %bb 2371; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2372; GFX9-NEXT: v_mov_b32_e32 v1, 15 2373; GFX9-NEXT: v_mov_b32_e32 v2, 0 2374; GFX9-NEXT: scratch_store_dwordx2 v0, v[1:2], off 2375; GFX9-NEXT: s_waitcnt vmcnt(0) 2376; GFX9-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc 2377; GFX9-NEXT: s_waitcnt vmcnt(0) 2378; GFX9-NEXT: s_setpc_b64 s[30:31] 2379; 2380; GFX10-LABEL: store_load_i64_unaligned: 2381; GFX10: ; %bb.0: ; %bb 2382; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2383; GFX10-NEXT: v_mov_b32_e32 v1, 15 2384; GFX10-NEXT: v_mov_b32_e32 v2, 0 2385; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off 2386; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2387; GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc 2388; GFX10-NEXT: s_waitcnt vmcnt(0) 2389; GFX10-NEXT: s_setpc_b64 s[30:31] 2390; 2391; GFX940-LABEL: store_load_i64_unaligned: 2392; GFX940: ; %bb.0: ; %bb 2393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2394; GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 2395; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 2396; GFX940-NEXT: s_waitcnt vmcnt(0) 2397; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 2398; GFX940-NEXT: s_waitcnt vmcnt(0) 2399; GFX940-NEXT: s_setpc_b64 s[30:31] 2400; 2401; GFX11-LABEL: store_load_i64_unaligned: 2402; GFX11: ; %bb.0: ; %bb 2403; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2404; GFX11-NEXT: v_mov_b32_e32 v1, 15 2405; GFX11-NEXT: v_mov_b32_e32 v2, 0 2406; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc 2407; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2408; GFX11-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc 2409; GFX11-NEXT: s_waitcnt vmcnt(0) 2410; GFX11-NEXT: s_setpc_b64 s[30:31] 2411; 2412; GFX12-LABEL: store_load_i64_unaligned: 2413; GFX12: ; %bb.0: ; %bb 2414; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2415; GFX12-NEXT: s_wait_expcnt 0x0 2416; GFX12-NEXT: s_wait_samplecnt 0x0 2417; GFX12-NEXT: s_wait_bvhcnt 0x0 2418; GFX12-NEXT: s_wait_kmcnt 0x0 2419; GFX12-NEXT: v_mov_b32_e32 v1, 15 2420; GFX12-NEXT: v_mov_b32_e32 v2, 0 2421; GFX12-NEXT: s_wait_storecnt 0x0 2422; GFX12-NEXT: scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS 2423; GFX12-NEXT: s_wait_storecnt 0x0 2424; GFX12-NEXT: scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS 2425; GFX12-NEXT: s_wait_loadcnt 0x0 2426; GFX12-NEXT: s_setpc_b64 s[30:31] 2427; 2428; UNALIGNED_GFX9-LABEL: store_load_i64_unaligned: 2429; UNALIGNED_GFX9: ; %bb.0: ; %bb 2430; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2431; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v4, 15 2432; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 4, v0 2433; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v2, 2, v0 2434; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2435; UNALIGNED_GFX9-NEXT: scratch_store_byte v0, v4, off 2436; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2437; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v4, 0 2438; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v6, 6, v0 2439; UNALIGNED_GFX9-NEXT: scratch_store_byte v3, v4, off 2440; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2441; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v5, 3, v0 2442; UNALIGNED_GFX9-NEXT: scratch_store_byte v2, v4, off 2443; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2444; UNALIGNED_GFX9-NEXT: scratch_store_byte v5, v4, off 2445; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2446; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v7, 5, v0 2447; UNALIGNED_GFX9-NEXT: scratch_store_byte v1, v4, off 2448; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2449; UNALIGNED_GFX9-NEXT: scratch_store_byte v7, v4, off 2450; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2451; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v8, 7, v0 2452; UNALIGNED_GFX9-NEXT: scratch_store_byte v6, v4, off 2453; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2454; UNALIGNED_GFX9-NEXT: scratch_store_byte v8, v4, off 2455; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2456; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v0, off glc 2457; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2458; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr7 2459; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr2 2460; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr6 2461; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr1 2462; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr3 2463; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr5 2464; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr8 2465; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr0 2466; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v3, off glc 2467; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2468; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v2, off glc 2469; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2470; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v5, off glc 2471; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2472; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v1, off glc 2473; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2474; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v7, off glc 2475; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2476; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v6, off glc 2477; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2478; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v8, off glc 2479; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2480; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 2481; 2482; UNALIGNED_GFX10-LABEL: store_load_i64_unaligned: 2483; UNALIGNED_GFX10: ; %bb.0: ; %bb 2484; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2485; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 2486; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 0 2487; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 2488; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v3, 4, v0 2489; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v5, 2, v0 2490; UNALIGNED_GFX10-NEXT: scratch_store_byte v0, v1, off 2491; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2492; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 3, v0 2493; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v6, 5, v0 2494; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v7, 6, v0 2495; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v8, 7, v0 2496; UNALIGNED_GFX10-NEXT: scratch_store_byte v4, v2, off 2497; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2498; UNALIGNED_GFX10-NEXT: scratch_store_byte v5, v2, off 2499; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2500; UNALIGNED_GFX10-NEXT: scratch_store_byte v1, v2, off 2501; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2502; UNALIGNED_GFX10-NEXT: scratch_store_byte v3, v2, off 2503; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2504; UNALIGNED_GFX10-NEXT: scratch_store_byte v6, v2, off 2505; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2506; UNALIGNED_GFX10-NEXT: scratch_store_byte v7, v2, off 2507; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2508; UNALIGNED_GFX10-NEXT: scratch_store_byte v8, v2, off 2509; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2510; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v0, off glc dlc 2511; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2512; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v4, off glc dlc 2513; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2514; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v5, off glc dlc 2515; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2516; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v1, off glc dlc 2517; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2518; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v3, off glc dlc 2519; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2520; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v6, off glc dlc 2521; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2522; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v7, off glc dlc 2523; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2524; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v8, off glc dlc 2525; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2526; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 2527; 2528; UNALIGNED_GFX940-LABEL: store_load_i64_unaligned: 2529; UNALIGNED_GFX940: ; %bb.0: ; %bb 2530; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2531; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, 15 2532; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 4, v0 2533; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 2534; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v3, 1, v0 2535; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v4, off sc0 sc1 2536; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2537; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, 0 2538; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 6, v0 2539; UNALIGNED_GFX940-NEXT: scratch_store_byte v3, v4, off sc0 sc1 2540; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2541; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 2542; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v4, off sc0 sc1 2543; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2544; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v4, off sc0 sc1 2545; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2546; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 5, v0 2547; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v4, off sc0 sc1 2548; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2549; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v4, off sc0 sc1 2550; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2551; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 7, v0 2552; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v4, off sc0 sc1 2553; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2554; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v4, off sc0 sc1 2555; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2556; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v0, off sc0 sc1 2557; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2558; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 2559; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 2560; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr6 2561; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 2562; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr3 2563; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 2564; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 2565; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 2566; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v3, off sc0 sc1 2567; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2568; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v2, off sc0 sc1 2569; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2570; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v5, off sc0 sc1 2571; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2572; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v1, off sc0 sc1 2573; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2574; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v7, off sc0 sc1 2575; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2576; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v6, off sc0 sc1 2577; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2578; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v8, off sc0 sc1 2579; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2580; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 2581; 2582; UNALIGNED_GFX11-LABEL: store_load_i64_unaligned: 2583; UNALIGNED_GFX11: ; %bb.0: ; %bb 2584; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2585; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 2586; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v4, 1, v0 2587; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v3, 4, v0 2588; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v5, 2, v0 2589; UNALIGNED_GFX11-NEXT: scratch_store_b8 v0, v1, off dlc 2590; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2591; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v0 2592; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v6, 5, v0 2593; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v7, 6, v0 2594; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v8, 7, v0 2595; UNALIGNED_GFX11-NEXT: scratch_store_b8 v4, v2, off dlc 2596; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2597; UNALIGNED_GFX11-NEXT: scratch_store_b8 v5, v2, off dlc 2598; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2599; UNALIGNED_GFX11-NEXT: scratch_store_b8 v1, v2, off dlc 2600; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2601; UNALIGNED_GFX11-NEXT: scratch_store_b8 v3, v2, off dlc 2602; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2603; UNALIGNED_GFX11-NEXT: scratch_store_b8 v6, v2, off dlc 2604; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2605; UNALIGNED_GFX11-NEXT: scratch_store_b8 v7, v2, off dlc 2606; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2607; UNALIGNED_GFX11-NEXT: scratch_store_b8 v8, v2, off dlc 2608; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2609; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v0, off glc dlc 2610; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2611; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v4, off glc dlc 2612; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2613; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v5, off glc dlc 2614; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2615; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v1, off glc dlc 2616; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2617; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v3, off glc dlc 2618; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2619; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v6, off glc dlc 2620; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2621; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v7, off glc dlc 2622; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2623; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v8, off glc dlc 2624; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 2625; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 2626; 2627; UNALIGNED_GFX12-LABEL: store_load_i64_unaligned: 2628; UNALIGNED_GFX12: ; %bb.0: ; %bb 2629; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2630; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 2631; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 2632; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 2633; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 2634; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 2635; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2636; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS 2637; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2638; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS 2639; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2640; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 2641; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2642; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS 2643; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2644; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:4 scope:SCOPE_SYS 2645; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2646; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS 2647; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2648; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS 2649; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2650; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS 2651; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 2652; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS 2653; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2654; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS 2655; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2656; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS 2657; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2658; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS 2659; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2660; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS 2661; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2662; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS 2663; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2664; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS 2665; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2666; UNALIGNED_GFX12-NEXT: scratch_load_u8 v0, v0, off offset:7 scope:SCOPE_SYS 2667; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 2668; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 2669bb: 2670 store volatile i64 15, ptr addrspace(5) %arg, align 1 2671 %load = load volatile i64, ptr addrspace(5) %arg, align 1 2672 ret void 2673} 2674 2675define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { 2676; GFX9-LABEL: store_load_v3i32_unaligned: 2677; GFX9: ; %bb.0: ; %bb 2678; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2679; GFX9-NEXT: s_mov_b32 s2, 3 2680; GFX9-NEXT: s_mov_b32 s1, 2 2681; GFX9-NEXT: s_mov_b32 s0, 1 2682; GFX9-NEXT: v_mov_b32_e32 v3, s2 2683; GFX9-NEXT: v_mov_b32_e32 v2, s1 2684; GFX9-NEXT: v_mov_b32_e32 v1, s0 2685; GFX9-NEXT: scratch_store_dwordx3 v0, v[1:3], off 2686; GFX9-NEXT: s_waitcnt vmcnt(0) 2687; GFX9-NEXT: scratch_load_dwordx3 v[0:2], v0, off glc 2688; GFX9-NEXT: s_waitcnt vmcnt(0) 2689; GFX9-NEXT: s_setpc_b64 s[30:31] 2690; 2691; GFX10-LABEL: store_load_v3i32_unaligned: 2692; GFX10: ; %bb.0: ; %bb 2693; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2694; GFX10-NEXT: s_mov_b32 s2, 3 2695; GFX10-NEXT: s_mov_b32 s1, 2 2696; GFX10-NEXT: s_mov_b32 s0, 1 2697; GFX10-NEXT: v_mov_b32_e32 v3, s2 2698; GFX10-NEXT: v_mov_b32_e32 v2, s1 2699; GFX10-NEXT: v_mov_b32_e32 v1, s0 2700; GFX10-NEXT: scratch_store_dwordx3 v0, v[1:3], off 2701; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2702; GFX10-NEXT: scratch_load_dwordx3 v[0:2], v0, off glc dlc 2703; GFX10-NEXT: s_waitcnt vmcnt(0) 2704; GFX10-NEXT: s_setpc_b64 s[30:31] 2705; 2706; GFX940-LABEL: store_load_v3i32_unaligned: 2707; GFX940: ; %bb.0: ; %bb 2708; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2709; GFX940-NEXT: s_mov_b32 s2, 3 2710; GFX940-NEXT: s_mov_b32 s1, 2 2711; GFX940-NEXT: s_mov_b32 s0, 1 2712; GFX940-NEXT: v_mov_b32_e32 v4, s2 2713; GFX940-NEXT: v_mov_b32_e32 v3, s1 2714; GFX940-NEXT: v_mov_b32_e32 v2, s0 2715; GFX940-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 2716; GFX940-NEXT: s_waitcnt vmcnt(0) 2717; GFX940-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 2718; GFX940-NEXT: s_waitcnt vmcnt(0) 2719; GFX940-NEXT: s_setpc_b64 s[30:31] 2720; 2721; GFX11-LABEL: store_load_v3i32_unaligned: 2722; GFX11: ; %bb.0: ; %bb 2723; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2724; GFX11-NEXT: s_mov_b32 s2, 3 2725; GFX11-NEXT: s_mov_b32 s1, 2 2726; GFX11-NEXT: s_mov_b32 s0, 1 2727; GFX11-NEXT: v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1 2728; GFX11-NEXT: v_mov_b32_e32 v1, s0 2729; GFX11-NEXT: scratch_store_b96 v0, v[1:3], off dlc 2730; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2731; GFX11-NEXT: scratch_load_b96 v[0:2], v0, off glc dlc 2732; GFX11-NEXT: s_waitcnt vmcnt(0) 2733; GFX11-NEXT: s_setpc_b64 s[30:31] 2734; 2735; GFX12-LABEL: store_load_v3i32_unaligned: 2736; GFX12: ; %bb.0: ; %bb 2737; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2738; GFX12-NEXT: s_wait_expcnt 0x0 2739; GFX12-NEXT: s_wait_samplecnt 0x0 2740; GFX12-NEXT: s_wait_bvhcnt 0x0 2741; GFX12-NEXT: s_wait_kmcnt 0x0 2742; GFX12-NEXT: s_mov_b32 s2, 3 2743; GFX12-NEXT: s_mov_b32 s1, 2 2744; GFX12-NEXT: s_mov_b32 s0, 1 2745; GFX12-NEXT: s_wait_alu 0xfffe 2746; GFX12-NEXT: v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1 2747; GFX12-NEXT: v_mov_b32_e32 v1, s0 2748; GFX12-NEXT: s_wait_storecnt 0x0 2749; GFX12-NEXT: scratch_store_b96 v0, v[1:3], off scope:SCOPE_SYS 2750; GFX12-NEXT: s_wait_storecnt 0x0 2751; GFX12-NEXT: scratch_load_b96 v[0:2], v0, off scope:SCOPE_SYS 2752; GFX12-NEXT: s_wait_loadcnt 0x0 2753; GFX12-NEXT: s_setpc_b64 s[30:31] 2754; 2755; UNALIGNED_GFX9-LABEL: store_load_v3i32_unaligned: 2756; UNALIGNED_GFX9: ; %bb.0: ; %bb 2757; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2758; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 1 2759; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 2 2760; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v2, 2, v0 2761; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2762; UNALIGNED_GFX9-NEXT: scratch_store_byte v0, v3, off 2763; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2764; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 0 2765; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v6, 4, v0 2766; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v7, 6, v0 2767; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v9, 8, v0 2768; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v10, 10, v0 2769; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v12, 3 2770; UNALIGNED_GFX9-NEXT: scratch_store_byte v4, v3, off 2771; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2772; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v5, 3, v0 2773; UNALIGNED_GFX9-NEXT: scratch_store_byte v2, v3, off 2774; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2775; UNALIGNED_GFX9-NEXT: scratch_store_byte v5, v3, off 2776; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2777; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v8, 5, v0 2778; UNALIGNED_GFX9-NEXT: scratch_store_byte v6, v1, off 2779; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2780; UNALIGNED_GFX9-NEXT: scratch_store_byte v8, v3, off 2781; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2782; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 7, v0 2783; UNALIGNED_GFX9-NEXT: scratch_store_byte v7, v3, off 2784; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2785; UNALIGNED_GFX9-NEXT: scratch_store_byte v1, v3, off 2786; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2787; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v11, 9, v0 2788; UNALIGNED_GFX9-NEXT: scratch_store_byte v9, v12, off 2789; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2790; UNALIGNED_GFX9-NEXT: scratch_store_byte v11, v3, off 2791; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2792; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v12, 11, v0 2793; UNALIGNED_GFX9-NEXT: scratch_store_byte v10, v3, off 2794; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2795; UNALIGNED_GFX9-NEXT: scratch_store_byte v12, v3, off 2796; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2797; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v0, off glc 2798; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2799; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr12 2800; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr4 2801; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr11 2802; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr7 2803; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr6 2804; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr10 2805; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr5 2806; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr9 2807; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr1 2808; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr8 2809; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr2 2810; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr0 2811; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v4, off glc 2812; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2813; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v2, off glc 2814; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2815; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v5, off glc 2816; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2817; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v6, off glc 2818; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2819; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v8, off glc 2820; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2821; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v7, off glc 2822; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2823; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v1, off glc 2824; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2825; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v9, off glc 2826; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2827; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v11, off glc 2828; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2829; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v10, off glc 2830; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2831; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v12, off glc 2832; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 2833; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 2834; 2835; UNALIGNED_GFX10-LABEL: store_load_v3i32_unaligned: 2836; UNALIGNED_GFX10: ; %bb.0: ; %bb 2837; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2838; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 1 2839; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v3, 0 2840; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0 2841; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 2 2842; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v4, 2, v0 2843; UNALIGNED_GFX10-NEXT: scratch_store_byte v0, v1, off 2844; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2845; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 3, v0 2846; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v6, 4, v0 2847; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v7, 5, v0 2848; UNALIGNED_GFX10-NEXT: scratch_store_byte v5, v3, off 2849; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2850; UNALIGNED_GFX10-NEXT: scratch_store_byte v4, v3, off 2851; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2852; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v8, 6, v0 2853; UNALIGNED_GFX10-NEXT: scratch_store_byte v1, v3, off 2854; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2855; UNALIGNED_GFX10-NEXT: scratch_store_byte v6, v2, off 2856; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2857; UNALIGNED_GFX10-NEXT: scratch_store_byte v7, v3, off 2858; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2859; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v2, 7, v0 2860; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v9, 8, v0 2861; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v10, 3 2862; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v11, 9, v0 2863; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v12, 10, v0 2864; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v13, 11, v0 2865; UNALIGNED_GFX10-NEXT: scratch_store_byte v8, v3, off 2866; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2867; UNALIGNED_GFX10-NEXT: scratch_store_byte v2, v3, off 2868; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2869; UNALIGNED_GFX10-NEXT: scratch_store_byte v9, v10, off 2870; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2871; UNALIGNED_GFX10-NEXT: scratch_store_byte v11, v3, off 2872; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2873; UNALIGNED_GFX10-NEXT: scratch_store_byte v12, v3, off 2874; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2875; UNALIGNED_GFX10-NEXT: scratch_store_byte v13, v3, off 2876; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2877; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v0, off glc dlc 2878; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2879; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v5, off glc dlc 2880; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2881; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v4, off glc dlc 2882; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2883; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v1, off glc dlc 2884; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2885; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v6, off glc dlc 2886; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2887; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v7, off glc dlc 2888; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2889; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v8, off glc dlc 2890; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2891; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v2, off glc dlc 2892; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2893; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v9, off glc dlc 2894; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2895; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v11, off glc dlc 2896; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2897; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v12, off glc dlc 2898; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2899; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v13, off glc dlc 2900; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 2901; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 2902; 2903; UNALIGNED_GFX940-LABEL: store_load_v3i32_unaligned: 2904; UNALIGNED_GFX940: ; %bb.0: ; %bb 2905; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2906; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 1 2907; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 2 2908; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 2909; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v4, 1, v0 2910; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v3, off sc0 sc1 2911; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2912; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 0 2913; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 4, v0 2914; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 6, v0 2915; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v9, 8, v0 2916; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v10, 10, v0 2917; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v12, 3 2918; UNALIGNED_GFX940-NEXT: scratch_store_byte v4, v3, off sc0 sc1 2919; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2920; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 2921; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v3, off sc0 sc1 2922; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2923; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v3, off sc0 sc1 2924; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2925; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 5, v0 2926; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v1, off sc0 sc1 2927; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2928; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v3, off sc0 sc1 2929; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2930; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 7, v0 2931; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v3, off sc0 sc1 2932; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2933; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v3, off sc0 sc1 2934; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2935; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v11, 9, v0 2936; UNALIGNED_GFX940-NEXT: scratch_store_byte v9, v12, off sc0 sc1 2937; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2938; UNALIGNED_GFX940-NEXT: scratch_store_byte v11, v3, off sc0 sc1 2939; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2940; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v12, 11, v0 2941; UNALIGNED_GFX940-NEXT: scratch_store_byte v10, v3, off sc0 sc1 2942; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2943; UNALIGNED_GFX940-NEXT: scratch_store_byte v12, v3, off sc0 sc1 2944; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2945; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 2946; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2947; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr12 2948; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr4 2949; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr11 2950; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 2951; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr6 2952; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr10 2953; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 2954; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr9 2955; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 2956; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 2957; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 2958; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 2959; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 2960; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2961; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 2962; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2963; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 2964; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2965; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v6, off sc0 sc1 2966; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2967; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 2968; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2969; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 2970; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2971; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 2972; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2973; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 2974; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2975; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 2976; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2977; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 2978; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2979; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 2980; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 2981; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 2982; 2983; UNALIGNED_GFX11-LABEL: store_load_v3i32_unaligned: 2984; UNALIGNED_GFX11: ; %bb.0: ; %bb 2985; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2986; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 2987; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_add_nc_u32 v4, 2, v0 2988; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_add_nc_u32 v5, 1, v0 2989; UNALIGNED_GFX11-NEXT: scratch_store_b8 v0, v1, off dlc 2990; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2991; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v0 2992; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v6, 4, v0 2993; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v7, 5, v0 2994; UNALIGNED_GFX11-NEXT: scratch_store_b8 v5, v3, off dlc 2995; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2996; UNALIGNED_GFX11-NEXT: scratch_store_b8 v4, v3, off dlc 2997; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2998; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v8, 6, v0 2999; UNALIGNED_GFX11-NEXT: scratch_store_b8 v1, v3, off dlc 3000; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3001; UNALIGNED_GFX11-NEXT: scratch_store_b8 v6, v2, off dlc 3002; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3003; UNALIGNED_GFX11-NEXT: scratch_store_b8 v7, v3, off dlc 3004; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3005; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v2, 7, v0 3006; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v9, 8, v0 3007; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v11, 9, v0 3008; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v12, 10, v0 3009; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v13, 11, v0 3010; UNALIGNED_GFX11-NEXT: scratch_store_b8 v8, v3, off dlc 3011; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3012; UNALIGNED_GFX11-NEXT: scratch_store_b8 v2, v3, off dlc 3013; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3014; UNALIGNED_GFX11-NEXT: scratch_store_b8 v9, v10, off dlc 3015; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3016; UNALIGNED_GFX11-NEXT: scratch_store_b8 v11, v3, off dlc 3017; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3018; UNALIGNED_GFX11-NEXT: scratch_store_b8 v12, v3, off dlc 3019; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3020; UNALIGNED_GFX11-NEXT: scratch_store_b8 v13, v3, off dlc 3021; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3022; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v0, off glc dlc 3023; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3024; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v5, off glc dlc 3025; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3026; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v4, off glc dlc 3027; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3028; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v1, off glc dlc 3029; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3030; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v6, off glc dlc 3031; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3032; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v7, off glc dlc 3033; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3034; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v8, off glc dlc 3035; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3036; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v2, off glc dlc 3037; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3038; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v9, off glc dlc 3039; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3040; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v11, off glc dlc 3041; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3042; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v12, off glc dlc 3043; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3044; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v13, off glc dlc 3045; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3046; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 3047; 3048; UNALIGNED_GFX12-LABEL: store_load_v3i32_unaligned: 3049; UNALIGNED_GFX12: ; %bb.0: ; %bb 3050; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3051; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 3052; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 3053; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 3054; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 3055; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0 3056; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v3, 2 3057; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3058; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS 3059; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3060; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS 3061; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3062; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 3063; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3064; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS 3065; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3066; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 3 3067; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 3068; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3069; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS 3070; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3071; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS 3072; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3073; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS 3074; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3075; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS 3076; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3077; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS 3078; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3079; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS 3080; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3081; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS 3082; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3083; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS 3084; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3085; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS 3086; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3087; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS 3088; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3089; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS 3090; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3091; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS 3092; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3093; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS 3094; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3095; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS 3096; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3097; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS 3098; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3099; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS 3100; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3101; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS 3102; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3103; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS 3104; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3105; UNALIGNED_GFX12-NEXT: scratch_load_u8 v0, v0, off offset:11 scope:SCOPE_SYS 3106; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3107; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 3108bb: 3109 store volatile <3 x i32> <i32 1, i32 2, i32 3>, ptr addrspace(5) %arg, align 1 3110 %load = load volatile <3 x i32>, ptr addrspace(5) %arg, align 1 3111 ret void 3112} 3113 3114define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { 3115; GFX9-LABEL: store_load_v4i32_unaligned: 3116; GFX9: ; %bb.0: ; %bb 3117; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3118; GFX9-NEXT: s_mov_b32 s3, 4 3119; GFX9-NEXT: s_mov_b32 s2, 3 3120; GFX9-NEXT: s_mov_b32 s1, 2 3121; GFX9-NEXT: s_mov_b32 s0, 1 3122; GFX9-NEXT: v_mov_b32_e32 v4, s3 3123; GFX9-NEXT: v_mov_b32_e32 v3, s2 3124; GFX9-NEXT: v_mov_b32_e32 v2, s1 3125; GFX9-NEXT: v_mov_b32_e32 v1, s0 3126; GFX9-NEXT: scratch_store_dwordx4 v0, v[1:4], off 3127; GFX9-NEXT: s_waitcnt vmcnt(0) 3128; GFX9-NEXT: scratch_load_dwordx4 v[0:3], v0, off glc 3129; GFX9-NEXT: s_waitcnt vmcnt(0) 3130; GFX9-NEXT: s_setpc_b64 s[30:31] 3131; 3132; GFX10-LABEL: store_load_v4i32_unaligned: 3133; GFX10: ; %bb.0: ; %bb 3134; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3135; GFX10-NEXT: s_mov_b32 s3, 4 3136; GFX10-NEXT: s_mov_b32 s2, 3 3137; GFX10-NEXT: s_mov_b32 s1, 2 3138; GFX10-NEXT: s_mov_b32 s0, 1 3139; GFX10-NEXT: v_mov_b32_e32 v4, s3 3140; GFX10-NEXT: v_mov_b32_e32 v3, s2 3141; GFX10-NEXT: v_mov_b32_e32 v2, s1 3142; GFX10-NEXT: v_mov_b32_e32 v1, s0 3143; GFX10-NEXT: scratch_store_dwordx4 v0, v[1:4], off 3144; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3145; GFX10-NEXT: scratch_load_dwordx4 v[0:3], v0, off glc dlc 3146; GFX10-NEXT: s_waitcnt vmcnt(0) 3147; GFX10-NEXT: s_setpc_b64 s[30:31] 3148; 3149; GFX940-LABEL: store_load_v4i32_unaligned: 3150; GFX940: ; %bb.0: ; %bb 3151; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3152; GFX940-NEXT: s_mov_b32 s3, 4 3153; GFX940-NEXT: s_mov_b32 s2, 3 3154; GFX940-NEXT: s_mov_b32 s1, 2 3155; GFX940-NEXT: s_mov_b32 s0, 1 3156; GFX940-NEXT: v_mov_b64_e32 v[4:5], s[2:3] 3157; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 3158; GFX940-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 3159; GFX940-NEXT: s_waitcnt vmcnt(0) 3160; GFX940-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 3161; GFX940-NEXT: s_waitcnt vmcnt(0) 3162; GFX940-NEXT: s_setpc_b64 s[30:31] 3163; 3164; GFX11-LABEL: store_load_v4i32_unaligned: 3165; GFX11: ; %bb.0: ; %bb 3166; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3167; GFX11-NEXT: s_mov_b32 s3, 4 3168; GFX11-NEXT: s_mov_b32 s2, 3 3169; GFX11-NEXT: s_mov_b32 s1, 2 3170; GFX11-NEXT: s_mov_b32 s0, 1 3171; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 3172; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 3173; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off dlc 3174; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3175; GFX11-NEXT: scratch_load_b128 v[0:3], v0, off glc dlc 3176; GFX11-NEXT: s_waitcnt vmcnt(0) 3177; GFX11-NEXT: s_setpc_b64 s[30:31] 3178; 3179; GFX12-LABEL: store_load_v4i32_unaligned: 3180; GFX12: ; %bb.0: ; %bb 3181; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3182; GFX12-NEXT: s_wait_expcnt 0x0 3183; GFX12-NEXT: s_wait_samplecnt 0x0 3184; GFX12-NEXT: s_wait_bvhcnt 0x0 3185; GFX12-NEXT: s_wait_kmcnt 0x0 3186; GFX12-NEXT: s_mov_b32 s3, 4 3187; GFX12-NEXT: s_mov_b32 s2, 3 3188; GFX12-NEXT: s_mov_b32 s1, 2 3189; GFX12-NEXT: s_mov_b32 s0, 1 3190; GFX12-NEXT: s_wait_alu 0xfffe 3191; GFX12-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 3192; GFX12-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 3193; GFX12-NEXT: s_wait_storecnt 0x0 3194; GFX12-NEXT: scratch_store_b128 v0, v[1:4], off scope:SCOPE_SYS 3195; GFX12-NEXT: s_wait_storecnt 0x0 3196; GFX12-NEXT: scratch_load_b128 v[0:3], v0, off scope:SCOPE_SYS 3197; GFX12-NEXT: s_wait_loadcnt 0x0 3198; GFX12-NEXT: s_setpc_b64 s[30:31] 3199; 3200; UNALIGNED_GFX9-LABEL: store_load_v4i32_unaligned: 3201; UNALIGNED_GFX9: ; %bb.0: ; %bb 3202; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3203; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 1 3204; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 2 3205; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v2, 2, v0 3206; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v4, 1, v0 3207; UNALIGNED_GFX9-NEXT: scratch_store_byte v0, v3, off 3208; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3209; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 0 3210; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v6, 4 3211; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v7, 4, v0 3212; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v8, 6, v0 3213; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v10, 8, v0 3214; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v11, 10, v0 3215; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v13, 3 3216; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v14, 12, v0 3217; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v15, 14, v0 3218; UNALIGNED_GFX9-NEXT: scratch_store_byte v4, v3, off 3219; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3220; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v5, 3, v0 3221; UNALIGNED_GFX9-NEXT: scratch_store_byte v2, v3, off 3222; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3223; UNALIGNED_GFX9-NEXT: scratch_store_byte v5, v3, off 3224; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3225; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v9, 5, v0 3226; UNALIGNED_GFX9-NEXT: scratch_store_byte v7, v1, off 3227; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3228; UNALIGNED_GFX9-NEXT: scratch_store_byte v9, v3, off 3229; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3230; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 7, v0 3231; UNALIGNED_GFX9-NEXT: scratch_store_byte v8, v3, off 3232; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3233; UNALIGNED_GFX9-NEXT: scratch_store_byte v1, v3, off 3234; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3235; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v12, 9, v0 3236; UNALIGNED_GFX9-NEXT: scratch_store_byte v10, v13, off 3237; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3238; UNALIGNED_GFX9-NEXT: scratch_store_byte v12, v3, off 3239; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3240; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v13, 11, v0 3241; UNALIGNED_GFX9-NEXT: scratch_store_byte v11, v3, off 3242; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3243; UNALIGNED_GFX9-NEXT: scratch_store_byte v13, v3, off 3244; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3245; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v16, 13, v0 3246; UNALIGNED_GFX9-NEXT: scratch_store_byte v14, v6, off 3247; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3248; UNALIGNED_GFX9-NEXT: scratch_store_byte v16, v3, off 3249; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3250; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v6, 15, v0 3251; UNALIGNED_GFX9-NEXT: scratch_store_byte v15, v3, off 3252; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3253; UNALIGNED_GFX9-NEXT: scratch_store_byte v6, v3, off 3254; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3255; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v0, off glc 3256; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3257; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v4, off glc 3258; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3259; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v2, off glc 3260; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3261; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v5, off glc 3262; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3263; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v7, off glc 3264; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3265; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v9, off glc 3266; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3267; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v8, off glc 3268; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3269; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v1, off glc 3270; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3271; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v10, off glc 3272; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3273; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v12, off glc 3274; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3275; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v11, off glc 3276; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3277; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v13, off glc 3278; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3279; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v14, off glc 3280; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3281; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v16, off glc 3282; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3283; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v15, off glc 3284; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3285; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr2 3286; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr1 3287; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr9 3288; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr16 3289; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr11 3290; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr4 3291; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr15 3292; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr10 3293; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr7 3294; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr13 3295; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr5 3296; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr14 3297; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr12 3298; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr8 3299; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr0 3300; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v0, v6, off glc 3301; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3302; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] 3303; 3304; UNALIGNED_GFX10-LABEL: store_load_v4i32_unaligned: 3305; UNALIGNED_GFX10: ; %bb.0: ; %bb 3306; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3307; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 1 3308; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 2 3309; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v3, 0 3310; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 3311; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v6, 4, v0 3312; UNALIGNED_GFX10-NEXT: scratch_store_byte v0, v1, off 3313; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3314; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 3, v0 3315; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v5, 2, v0 3316; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v7, 5, v0 3317; UNALIGNED_GFX10-NEXT: scratch_store_byte v4, v3, off 3318; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3319; UNALIGNED_GFX10-NEXT: scratch_store_byte v5, v3, off 3320; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3321; UNALIGNED_GFX10-NEXT: scratch_store_byte v1, v3, off 3322; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3323; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v9, 6, v0 3324; UNALIGNED_GFX10-NEXT: scratch_store_byte v6, v2, off 3325; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3326; UNALIGNED_GFX10-NEXT: scratch_store_byte v7, v3, off 3327; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3328; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v2, 7, v0 3329; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v10, 8, v0 3330; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v11, 3 3331; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v12, 9, v0 3332; UNALIGNED_GFX10-NEXT: scratch_store_byte v9, v3, off 3333; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3334; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v13, 10, v0 3335; UNALIGNED_GFX10-NEXT: scratch_store_byte v2, v3, off 3336; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3337; UNALIGNED_GFX10-NEXT: scratch_store_byte v10, v11, off 3338; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3339; UNALIGNED_GFX10-NEXT: scratch_store_byte v12, v3, off 3340; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3341; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v11, 11, v0 3342; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v8, 4 3343; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v14, 12, v0 3344; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v15, 13, v0 3345; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v16, 14, v0 3346; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v17, 15, v0 3347; UNALIGNED_GFX10-NEXT: scratch_store_byte v13, v3, off 3348; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3349; UNALIGNED_GFX10-NEXT: scratch_store_byte v11, v3, off 3350; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3351; UNALIGNED_GFX10-NEXT: scratch_store_byte v14, v8, off 3352; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3353; UNALIGNED_GFX10-NEXT: scratch_store_byte v15, v3, off 3354; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3355; UNALIGNED_GFX10-NEXT: scratch_store_byte v16, v3, off 3356; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3357; UNALIGNED_GFX10-NEXT: scratch_store_byte v17, v3, off 3358; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3359; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v0, off glc dlc 3360; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3361; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v4, off glc dlc 3362; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3363; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v5, off glc dlc 3364; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3365; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v1, off glc dlc 3366; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3367; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v6, off glc dlc 3368; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3369; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v7, off glc dlc 3370; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3371; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v9, off glc dlc 3372; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3373; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v2, off glc dlc 3374; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3375; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v10, off glc dlc 3376; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3377; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v12, off glc dlc 3378; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3379; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v13, off glc dlc 3380; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3381; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v11, off glc dlc 3382; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3383; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v14, off glc dlc 3384; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3385; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v15, off glc dlc 3386; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3387; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v16, off glc dlc 3388; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3389; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v17, off glc dlc 3390; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3391; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] 3392; 3393; UNALIGNED_GFX940-LABEL: store_load_v4i32_unaligned: 3394; UNALIGNED_GFX940: ; %bb.0: ; %bb 3395; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3396; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 1 3397; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 2 3398; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 3399; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v4, 1, v0 3400; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v3, off sc0 sc1 3401; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3402; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 0 3403; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v6, 4 3404; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 4, v0 3405; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 6, v0 3406; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v10, 8, v0 3407; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v11, 10, v0 3408; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v13, 3 3409; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v14, 12, v0 3410; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v15, 14, v0 3411; UNALIGNED_GFX940-NEXT: scratch_store_byte v4, v3, off sc0 sc1 3412; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3413; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 3414; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v3, off sc0 sc1 3415; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3416; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v3, off sc0 sc1 3417; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3418; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v9, 5, v0 3419; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v1, off sc0 sc1 3420; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3421; UNALIGNED_GFX940-NEXT: scratch_store_byte v9, v3, off sc0 sc1 3422; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3423; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 7, v0 3424; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v3, off sc0 sc1 3425; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3426; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v3, off sc0 sc1 3427; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3428; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v12, 9, v0 3429; UNALIGNED_GFX940-NEXT: scratch_store_byte v10, v13, off sc0 sc1 3430; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3431; UNALIGNED_GFX940-NEXT: scratch_store_byte v12, v3, off sc0 sc1 3432; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3433; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v13, 11, v0 3434; UNALIGNED_GFX940-NEXT: scratch_store_byte v11, v3, off sc0 sc1 3435; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3436; UNALIGNED_GFX940-NEXT: scratch_store_byte v13, v3, off sc0 sc1 3437; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3438; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v16, 13, v0 3439; UNALIGNED_GFX940-NEXT: scratch_store_byte v14, v6, off sc0 sc1 3440; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3441; UNALIGNED_GFX940-NEXT: scratch_store_byte v16, v3, off sc0 sc1 3442; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3443; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 15, v0 3444; UNALIGNED_GFX940-NEXT: scratch_store_byte v15, v3, off sc0 sc1 3445; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3446; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v3, off sc0 sc1 3447; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3448; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 3449; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3450; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 3451; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3452; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 3453; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3454; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 3455; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3456; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 3457; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3458; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 3459; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3460; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 3461; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3462; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 3463; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3464; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 3465; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3466; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 3467; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3468; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 3469; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3470; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v13, off sc0 sc1 3471; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3472; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v14, off sc0 sc1 3473; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3474; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v16, off sc0 sc1 3475; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3476; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v15, off sc0 sc1 3477; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3478; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 3479; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 3480; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr9 3481; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr16 3482; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr11 3483; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr4 3484; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr15 3485; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr10 3486; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 3487; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr13 3488; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 3489; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr14 3490; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr12 3491; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 3492; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 3493; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v0, v6, off sc0 sc1 3494; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3495; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] 3496; 3497; UNALIGNED_GFX11-LABEL: store_load_v4i32_unaligned: 3498; UNALIGNED_GFX11: ; %bb.0: ; %bb 3499; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3500; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 3501; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_add_nc_u32 v4, 1, v0 3502; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v11, 3 :: v_dual_add_nc_u32 v6, 4, v0 3503; UNALIGNED_GFX11-NEXT: scratch_store_b8 v0, v1, off dlc 3504; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3505; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v0 3506; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v8, 4 :: v_dual_add_nc_u32 v5, 2, v0 3507; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v7, 5, v0 3508; UNALIGNED_GFX11-NEXT: scratch_store_b8 v4, v3, off dlc 3509; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3510; UNALIGNED_GFX11-NEXT: scratch_store_b8 v5, v3, off dlc 3511; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3512; UNALIGNED_GFX11-NEXT: scratch_store_b8 v1, v3, off dlc 3513; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3514; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v9, 6, v0 3515; UNALIGNED_GFX11-NEXT: scratch_store_b8 v6, v2, off dlc 3516; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3517; UNALIGNED_GFX11-NEXT: scratch_store_b8 v7, v3, off dlc 3518; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3519; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v2, 7, v0 3520; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v10, 8, v0 3521; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v12, 9, v0 3522; UNALIGNED_GFX11-NEXT: scratch_store_b8 v9, v3, off dlc 3523; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3524; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v13, 10, v0 3525; UNALIGNED_GFX11-NEXT: scratch_store_b8 v2, v3, off dlc 3526; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3527; UNALIGNED_GFX11-NEXT: scratch_store_b8 v10, v11, off dlc 3528; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3529; UNALIGNED_GFX11-NEXT: scratch_store_b8 v12, v3, off dlc 3530; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3531; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v11, 11, v0 3532; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v14, 12, v0 3533; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v15, 13, v0 3534; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v16, 14, v0 3535; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v17, 15, v0 3536; UNALIGNED_GFX11-NEXT: scratch_store_b8 v13, v3, off dlc 3537; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3538; UNALIGNED_GFX11-NEXT: scratch_store_b8 v11, v3, off dlc 3539; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3540; UNALIGNED_GFX11-NEXT: scratch_store_b8 v14, v8, off dlc 3541; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3542; UNALIGNED_GFX11-NEXT: scratch_store_b8 v15, v3, off dlc 3543; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3544; UNALIGNED_GFX11-NEXT: scratch_store_b8 v16, v3, off dlc 3545; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3546; UNALIGNED_GFX11-NEXT: scratch_store_b8 v17, v3, off dlc 3547; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3548; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v0, off glc dlc 3549; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3550; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v4, off glc dlc 3551; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3552; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v5, off glc dlc 3553; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3554; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v1, off glc dlc 3555; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3556; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v6, off glc dlc 3557; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3558; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v7, off glc dlc 3559; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3560; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v9, off glc dlc 3561; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3562; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v2, off glc dlc 3563; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3564; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v10, off glc dlc 3565; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3566; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v12, off glc dlc 3567; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3568; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v13, off glc dlc 3569; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3570; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v11, off glc dlc 3571; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3572; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v14, off glc dlc 3573; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3574; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v15, off glc dlc 3575; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3576; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v16, off glc dlc 3577; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3578; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v17, off glc dlc 3579; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3580; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] 3581; 3582; UNALIGNED_GFX12-LABEL: store_load_v4i32_unaligned: 3583; UNALIGNED_GFX12: ; %bb.0: ; %bb 3584; UNALIGNED_GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3585; UNALIGNED_GFX12-NEXT: s_wait_expcnt 0x0 3586; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 3587; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 3588; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 3589; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0 3590; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v3, 2 3591; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3592; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS 3593; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3594; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS 3595; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3596; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 3597; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3598; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS 3599; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3600; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 3601; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3602; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 3 3603; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v3, 4 3604; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS 3605; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3606; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS 3607; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3608; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS 3609; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3610; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS 3611; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3612; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS 3613; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3614; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS 3615; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3616; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS 3617; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3618; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v3, off offset:12 scope:SCOPE_SYS 3619; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3620; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:13 scope:SCOPE_SYS 3621; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3622; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:14 scope:SCOPE_SYS 3623; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3624; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:15 scope:SCOPE_SYS 3625; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3626; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS 3627; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3628; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS 3629; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3630; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS 3631; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3632; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS 3633; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3634; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS 3635; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3636; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS 3637; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3638; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS 3639; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3640; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS 3641; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3642; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS 3643; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3644; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS 3645; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3646; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS 3647; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3648; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:11 scope:SCOPE_SYS 3649; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3650; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:12 scope:SCOPE_SYS 3651; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3652; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:13 scope:SCOPE_SYS 3653; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3654; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:14 scope:SCOPE_SYS 3655; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3656; UNALIGNED_GFX12-NEXT: scratch_load_u8 v0, v0, off offset:15 scope:SCOPE_SYS 3657; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3658; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] 3659bb: 3660 store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %arg, align 1 3661 %load = load volatile <4 x i32>, ptr addrspace(5) %arg, align 1 3662 ret void 3663} 3664 3665define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) { 3666; GFX9-LABEL: sgpr_base_large_offset: 3667; GFX9: ; %bb.0: ; %entry 3668; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3669; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3670; GFX9-NEXT: s_add_u32 s0, s2, 0xffe8 3671; GFX9-NEXT: scratch_load_dword v2, off, s0 3672; GFX9-NEXT: s_waitcnt vmcnt(0) 3673; GFX9-NEXT: global_store_dword v[0:1], v2, off 3674; GFX9-NEXT: s_endpgm 3675; 3676; GFX10-LABEL: sgpr_base_large_offset: 3677; GFX10: ; %bb.0: ; %entry 3678; GFX10-NEXT: s_add_u32 s0, s0, s5 3679; GFX10-NEXT: s_addc_u32 s1, s1, 0 3680; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 3681; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 3682; GFX10-NEXT: s_add_u32 s0, s2, 0xffe8 3683; GFX10-NEXT: scratch_load_dword v2, off, s0 3684; GFX10-NEXT: s_waitcnt vmcnt(0) 3685; GFX10-NEXT: global_store_dword v[0:1], v2, off 3686; GFX10-NEXT: s_endpgm 3687; 3688; GFX940-LABEL: sgpr_base_large_offset: 3689; GFX940: ; %bb.0: ; %entry 3690; GFX940-NEXT: s_add_u32 s0, s0, 0xffe8 3691; GFX940-NEXT: scratch_load_dword v2, off, s0 3692; GFX940-NEXT: s_waitcnt vmcnt(0) 3693; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 3694; GFX940-NEXT: s_endpgm 3695; 3696; GFX11-LABEL: sgpr_base_large_offset: 3697; GFX11: ; %bb.0: ; %entry 3698; GFX11-NEXT: s_add_u32 s0, s0, 0xffe8 3699; GFX11-NEXT: scratch_load_b32 v2, off, s0 3700; GFX11-NEXT: s_waitcnt vmcnt(0) 3701; GFX11-NEXT: global_store_b32 v[0:1], v2, off 3702; GFX11-NEXT: s_endpgm 3703; 3704; GFX12-LABEL: sgpr_base_large_offset: 3705; GFX12: ; %bb.0: ; %entry 3706; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512 3707; GFX12-NEXT: s_wait_loadcnt 0x0 3708; GFX12-NEXT: global_store_b32 v[0:1], v2, off 3709; GFX12-NEXT: s_endpgm 3710; 3711; UNALIGNED_GFX9-LABEL: sgpr_base_large_offset: 3712; UNALIGNED_GFX9: ; %bb.0: ; %entry 3713; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3714; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3715; UNALIGNED_GFX9-NEXT: s_add_u32 s0, s2, 0xffe8 3716; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s0 3717; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3718; UNALIGNED_GFX9-NEXT: global_store_dword v[0:1], v2, off 3719; UNALIGNED_GFX9-NEXT: s_endpgm 3720; 3721; UNALIGNED_GFX10-LABEL: sgpr_base_large_offset: 3722; UNALIGNED_GFX10: ; %bb.0: ; %entry 3723; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5 3724; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0 3725; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 3726; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 3727; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s2, 0xffe8 3728; UNALIGNED_GFX10-NEXT: scratch_load_dword v2, off, s0 3729; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3730; UNALIGNED_GFX10-NEXT: global_store_dword v[0:1], v2, off 3731; UNALIGNED_GFX10-NEXT: s_endpgm 3732; 3733; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset: 3734; UNALIGNED_GFX940: ; %bb.0: ; %entry 3735; UNALIGNED_GFX940-NEXT: s_add_u32 s0, s0, 0xffe8 3736; UNALIGNED_GFX940-NEXT: scratch_load_dword v2, off, s0 3737; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3738; UNALIGNED_GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 3739; UNALIGNED_GFX940-NEXT: s_endpgm 3740; 3741; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset: 3742; UNALIGNED_GFX11: ; %bb.0: ; %entry 3743; UNALIGNED_GFX11-NEXT: s_add_u32 s0, s0, 0xffe8 3744; UNALIGNED_GFX11-NEXT: scratch_load_b32 v2, off, s0 3745; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3746; UNALIGNED_GFX11-NEXT: global_store_b32 v[0:1], v2, off 3747; UNALIGNED_GFX11-NEXT: s_endpgm 3748; 3749; UNALIGNED_GFX12-LABEL: sgpr_base_large_offset: 3750; UNALIGNED_GFX12: ; %bb.0: ; %entry 3751; UNALIGNED_GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512 3752; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3753; UNALIGNED_GFX12-NEXT: global_store_b32 v[0:1], v2, off 3754; UNALIGNED_GFX12-NEXT: s_endpgm 3755entry: 3756 %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512 3757 %load = load i32, ptr addrspace(5) %large_offset, align 4 3758 store i32 %load, ptr addrspace(1) %out 3759 ret void 3760} 3761 3762define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) { 3763; GFX9-LABEL: sgpr_base_large_offset_split: 3764; GFX9: ; %bb.0: ; %entry 3765; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3766; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3767; GFX9-NEXT: s_and_b32 s0, s2, -4 3768; GFX9-NEXT: s_add_u32 s0, s0, 0x100ffe8 3769; GFX9-NEXT: scratch_load_dword v2, off, s0 glc 3770; GFX9-NEXT: s_waitcnt vmcnt(0) 3771; GFX9-NEXT: global_store_dword v[0:1], v2, off 3772; GFX9-NEXT: s_endpgm 3773; 3774; GFX10-LABEL: sgpr_base_large_offset_split: 3775; GFX10: ; %bb.0: ; %entry 3776; GFX10-NEXT: s_add_u32 s0, s0, s5 3777; GFX10-NEXT: s_addc_u32 s1, s1, 0 3778; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 3779; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 3780; GFX10-NEXT: s_and_b32 s0, s2, -4 3781; GFX10-NEXT: s_add_u32 s0, s0, 0x100ffe8 3782; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc 3783; GFX10-NEXT: s_waitcnt vmcnt(0) 3784; GFX10-NEXT: global_store_dword v[0:1], v2, off 3785; GFX10-NEXT: s_endpgm 3786; 3787; GFX940-LABEL: sgpr_base_large_offset_split: 3788; GFX940: ; %bb.0: ; %entry 3789; GFX940-NEXT: s_and_b32 s0, s0, -4 3790; GFX940-NEXT: s_add_u32 s0, s0, 0x100ffe8 3791; GFX940-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 3792; GFX940-NEXT: s_waitcnt vmcnt(0) 3793; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 3794; GFX940-NEXT: s_endpgm 3795; 3796; GFX11-LABEL: sgpr_base_large_offset_split: 3797; GFX11: ; %bb.0: ; %entry 3798; GFX11-NEXT: s_and_b32 s0, s0, -4 3799; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3800; GFX11-NEXT: s_add_u32 s0, s0, 0x100ffe8 3801; GFX11-NEXT: scratch_load_b32 v2, off, s0 glc dlc 3802; GFX11-NEXT: s_waitcnt vmcnt(0) 3803; GFX11-NEXT: global_store_b32 v[0:1], v2, off 3804; GFX11-NEXT: s_endpgm 3805; 3806; GFX12-LABEL: sgpr_base_large_offset_split: 3807; GFX12: ; %bb.0: ; %entry 3808; GFX12-NEXT: s_and_b32 s0, s0, -4 3809; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3810; GFX12-NEXT: s_add_co_u32 s0, s0, 0x100ffe8 3811; GFX12-NEXT: scratch_load_b32 v2, off, s0 scope:SCOPE_SYS 3812; GFX12-NEXT: s_wait_loadcnt 0x0 3813; GFX12-NEXT: global_store_b32 v[0:1], v2, off 3814; GFX12-NEXT: s_endpgm 3815; 3816; UNALIGNED_GFX9-LABEL: sgpr_base_large_offset_split: 3817; UNALIGNED_GFX9: ; %bb.0: ; %entry 3818; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3819; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3820; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s2, -4 3821; UNALIGNED_GFX9-NEXT: s_add_u32 s0, s0, 0x100ffe8 3822; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s0 glc 3823; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3824; UNALIGNED_GFX9-NEXT: global_store_dword v[0:1], v2, off 3825; UNALIGNED_GFX9-NEXT: s_endpgm 3826; 3827; UNALIGNED_GFX10-LABEL: sgpr_base_large_offset_split: 3828; UNALIGNED_GFX10: ; %bb.0: ; %entry 3829; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5 3830; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0 3831; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 3832; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 3833; UNALIGNED_GFX10-NEXT: s_and_b32 s0, s2, -4 3834; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, 0x100ffe8 3835; UNALIGNED_GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc 3836; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 3837; UNALIGNED_GFX10-NEXT: global_store_dword v[0:1], v2, off 3838; UNALIGNED_GFX10-NEXT: s_endpgm 3839; 3840; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset_split: 3841; UNALIGNED_GFX940: ; %bb.0: ; %entry 3842; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, -4 3843; UNALIGNED_GFX940-NEXT: s_add_u32 s0, s0, 0x100ffe8 3844; UNALIGNED_GFX940-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 3845; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3846; UNALIGNED_GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 3847; UNALIGNED_GFX940-NEXT: s_endpgm 3848; 3849; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset_split: 3850; UNALIGNED_GFX11: ; %bb.0: ; %entry 3851; UNALIGNED_GFX11-NEXT: s_and_b32 s0, s0, -4 3852; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3853; UNALIGNED_GFX11-NEXT: s_add_u32 s0, s0, 0x100ffe8 3854; UNALIGNED_GFX11-NEXT: scratch_load_b32 v2, off, s0 glc dlc 3855; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 3856; UNALIGNED_GFX11-NEXT: global_store_b32 v[0:1], v2, off 3857; UNALIGNED_GFX11-NEXT: s_endpgm 3858; 3859; UNALIGNED_GFX12-LABEL: sgpr_base_large_offset_split: 3860; UNALIGNED_GFX12: ; %bb.0: ; %entry 3861; UNALIGNED_GFX12-NEXT: s_and_b32 s0, s0, -4 3862; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3863; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, s0, 0x100ffe8 3864; UNALIGNED_GFX12-NEXT: scratch_load_b32 v2, off, s0 scope:SCOPE_SYS 3865; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 3866; UNALIGNED_GFX12-NEXT: global_store_b32 v[0:1], v2, off 3867; UNALIGNED_GFX12-NEXT: s_endpgm 3868entry: 3869 ;%allignedBase = alloca [33554432 x i8], align 4, addrspace(5) 3870 %sgpr_base_i32 = ptrtoint ptr addrspace(5) %sgpr_base to i32 3871 %sgpr_base_i32_align4 = and i32 %sgpr_base_i32, 4294967292 3872 %sgpr_base_align4 = inttoptr i32 %sgpr_base_i32_align4 to ptr addrspace(5) 3873 %split_offset = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base_align4, i32 0, i32 16842728 3874 %load = load volatile i32, ptr addrspace(5) %split_offset, align 4 3875 store i32 %load, ptr addrspace(1) %out 3876 ret void 3877} 3878 3879define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) { 3880; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3881; GFX9: ; %bb.0: ; %bb 3882; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3883; GFX9-NEXT: v_add_u32_e32 v0, s3, v0 3884; GFX9-NEXT: v_mov_b32_e32 v1, 0xffe8 3885; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3886; GFX9-NEXT: v_add3_u32 v0, s2, v0, v1 3887; GFX9-NEXT: v_mov_b32_e32 v1, 15 3888; GFX9-NEXT: scratch_store_dword v0, v1, off 3889; GFX9-NEXT: s_waitcnt vmcnt(0) 3890; GFX9-NEXT: s_endpgm 3891; 3892; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3893; GFX10: ; %bb.0: ; %bb 3894; GFX10-NEXT: s_add_u32 s0, s0, s5 3895; GFX10-NEXT: s_addc_u32 s1, s1, 0 3896; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 3897; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 3898; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 3899; GFX10-NEXT: v_mov_b32_e32 v1, 15 3900; GFX10-NEXT: v_add3_u32 v0, s2, v0, 0xffe8 3901; GFX10-NEXT: scratch_store_dword v0, v1, off 3902; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3903; GFX10-NEXT: s_endpgm 3904; 3905; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3906; GFX940: ; %bb.0: ; %bb 3907; GFX940-NEXT: v_add_u32_e32 v0, s1, v0 3908; GFX940-NEXT: v_mov_b32_e32 v1, 0xffe8 3909; GFX940-NEXT: v_add3_u32 v0, s0, v0, v1 3910; GFX940-NEXT: v_mov_b32_e32 v1, 15 3911; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 3912; GFX940-NEXT: s_waitcnt vmcnt(0) 3913; GFX940-NEXT: s_endpgm 3914; 3915; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3916; GFX11: ; %bb.0: ; %bb 3917; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 3918; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3919; GFX11-NEXT: v_add3_u32 v0, s0, v0, 0xffe8 3920; GFX11-NEXT: scratch_store_b32 v0, v1, off dlc 3921; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3922; GFX11-NEXT: s_endpgm 3923; 3924; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3925; GFX12: ; %bb.0: ; %bb 3926; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 3927; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3928; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 3929; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS 3930; GFX12-NEXT: s_wait_storecnt 0x0 3931; GFX12-NEXT: s_endpgm 3932; 3933; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3934; UNALIGNED_GFX9: ; %bb.0: ; %bb 3935; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3936; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0 3937; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 0xffe8 3938; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3939; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, v1 3940; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 3941; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off 3942; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 3943; UNALIGNED_GFX9-NEXT: s_endpgm 3944; 3945; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3946; UNALIGNED_GFX10: ; %bb.0: ; %bb 3947; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5 3948; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0 3949; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 3950; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 3951; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 3952; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 3953; UNALIGNED_GFX10-NEXT: v_add3_u32 v0, s2, v0, 0xffe8 3954; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off 3955; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3956; UNALIGNED_GFX10-NEXT: s_endpgm 3957; 3958; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3959; UNALIGNED_GFX940: ; %bb.0: ; %bb 3960; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0 3961; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 0xffe8 3962; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, v1 3963; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 3964; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 3965; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 3966; UNALIGNED_GFX940-NEXT: s_endpgm 3967; 3968; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3969; UNALIGNED_GFX11: ; %bb.0: ; %bb 3970; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 3971; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3972; UNALIGNED_GFX11-NEXT: v_add3_u32 v0, s0, v0, 0xffe8 3973; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off dlc 3974; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3975; UNALIGNED_GFX11-NEXT: s_endpgm 3976; 3977; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 3978; UNALIGNED_GFX12: ; %bb.0: ; %bb 3979; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 3980; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3981; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 3982; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS 3983; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 3984; UNALIGNED_GFX12-NEXT: s_endpgm 3985bb: 3986 %add1 = add nsw i32 %sidx, %vidx 3987 %add2 = add nsw i32 %add1, 65512 3988 %gep = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2 3989 store volatile i32 15, ptr addrspace(5) %gep, align 4 3990 ret void 3991} 3992 3993define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) { 3994; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 3995; GFX9: ; %bb.0: ; %bb 3996; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 3997; GFX9-NEXT: v_add_u32_e32 v0, s3, v0 3998; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 3999; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16 4000; GFX9-NEXT: v_mov_b32_e32 v1, 15 4001; GFX9-NEXT: scratch_store_dword v0, v1, off 4002; GFX9-NEXT: s_waitcnt vmcnt(0) 4003; GFX9-NEXT: s_endpgm 4004; 4005; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4006; GFX10: ; %bb.0: ; %bb 4007; GFX10-NEXT: s_add_u32 s0, s0, s5 4008; GFX10-NEXT: s_addc_u32 s1, s1, 0 4009; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 4010; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 4011; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 4012; GFX10-NEXT: v_mov_b32_e32 v1, 15 4013; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0 4014; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16 4015; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4016; GFX10-NEXT: s_endpgm 4017; 4018; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4019; GFX940: ; %bb.0: ; %bb 4020; GFX940-NEXT: v_add_u32_e32 v0, s1, v0 4021; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16 4022; GFX940-NEXT: v_mov_b32_e32 v1, 15 4023; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 4024; GFX940-NEXT: s_waitcnt vmcnt(0) 4025; GFX940-NEXT: s_endpgm 4026; 4027; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4028; GFX11: ; %bb.0: ; %bb 4029; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 4030; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4031; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0 4032; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc 4033; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4034; GFX11-NEXT: s_endpgm 4035; 4036; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4037; GFX12: ; %bb.0: ; %bb 4038; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 4039; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 4040; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 4041; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS 4042; GFX12-NEXT: s_wait_storecnt 0x0 4043; GFX12-NEXT: s_endpgm 4044; 4045; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4046; UNALIGNED_GFX9: ; %bb.0: ; %bb 4047; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 4048; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0 4049; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 4050; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16 4051; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 4052; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off 4053; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 4054; UNALIGNED_GFX9-NEXT: s_endpgm 4055; 4056; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4057; UNALIGNED_GFX10: ; %bb.0: ; %bb 4058; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5 4059; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0 4060; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 4061; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 4062; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 4063; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 4064; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0 4065; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16 4066; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4067; UNALIGNED_GFX10-NEXT: s_endpgm 4068; 4069; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4070; UNALIGNED_GFX940: ; %bb.0: ; %bb 4071; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0 4072; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16 4073; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 4074; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 4075; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 4076; UNALIGNED_GFX940-NEXT: s_endpgm 4077; 4078; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4079; UNALIGNED_GFX11: ; %bb.0: ; %bb 4080; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 4081; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4082; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0 4083; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc 4084; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4085; UNALIGNED_GFX11-NEXT: s_endpgm 4086; 4087; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 4088; UNALIGNED_GFX12: ; %bb.0: ; %bb 4089; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 4090; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 4091; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 4092; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS 4093; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 4094; UNALIGNED_GFX12-NEXT: s_endpgm 4095bb: 4096 %add1 = add nsw i32 %sidx, %vidx 4097 %add2 = add nsw i32 %add1, -16 4098 %gep = getelementptr inbounds [16 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2 4099 store volatile i32 15, ptr addrspace(5) %gep, align 4 4100 ret void 4101} 4102 4103define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) { 4104; GFX9-LABEL: sgpr_base_negative_offset: 4105; GFX9: ; %bb.0: ; %entry 4106; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 4107; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 4108; GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8 4109; GFX9-NEXT: scratch_load_dword v2, off, s0 4110; GFX9-NEXT: s_waitcnt vmcnt(0) 4111; GFX9-NEXT: global_store_dword v[0:1], v2, off 4112; GFX9-NEXT: s_endpgm 4113; 4114; GFX10-LABEL: sgpr_base_negative_offset: 4115; GFX10: ; %bb.0: ; %entry 4116; GFX10-NEXT: s_add_u32 s0, s0, s5 4117; GFX10-NEXT: s_addc_u32 s1, s1, 0 4118; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 4119; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 4120; GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24 4121; GFX10-NEXT: s_waitcnt vmcnt(0) 4122; GFX10-NEXT: global_store_dword v[0:1], v2, off 4123; GFX10-NEXT: s_endpgm 4124; 4125; GFX940-LABEL: sgpr_base_negative_offset: 4126; GFX940: ; %bb.0: ; %entry 4127; GFX940-NEXT: s_add_u32 s0, s0, 0xffffffe8 4128; GFX940-NEXT: scratch_load_dword v2, off, s0 4129; GFX940-NEXT: s_waitcnt vmcnt(0) 4130; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 4131; GFX940-NEXT: s_endpgm 4132; 4133; GFX11-LABEL: sgpr_base_negative_offset: 4134; GFX11: ; %bb.0: ; %entry 4135; GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24 4136; GFX11-NEXT: s_waitcnt vmcnt(0) 4137; GFX11-NEXT: global_store_b32 v[0:1], v2, off 4138; GFX11-NEXT: s_endpgm 4139; 4140; GFX12-LABEL: sgpr_base_negative_offset: 4141; GFX12: ; %bb.0: ; %entry 4142; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24 4143; GFX12-NEXT: s_wait_loadcnt 0x0 4144; GFX12-NEXT: global_store_b32 v[0:1], v2, off 4145; GFX12-NEXT: s_endpgm 4146; 4147; UNALIGNED_GFX9-LABEL: sgpr_base_negative_offset: 4148; UNALIGNED_GFX9: ; %bb.0: ; %entry 4149; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 4150; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 4151; UNALIGNED_GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8 4152; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s0 4153; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) 4154; UNALIGNED_GFX9-NEXT: global_store_dword v[0:1], v2, off 4155; UNALIGNED_GFX9-NEXT: s_endpgm 4156; 4157; UNALIGNED_GFX10-LABEL: sgpr_base_negative_offset: 4158; UNALIGNED_GFX10: ; %bb.0: ; %entry 4159; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5 4160; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0 4161; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 4162; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 4163; UNALIGNED_GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24 4164; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) 4165; UNALIGNED_GFX10-NEXT: global_store_dword v[0:1], v2, off 4166; UNALIGNED_GFX10-NEXT: s_endpgm 4167; 4168; UNALIGNED_GFX940-LABEL: sgpr_base_negative_offset: 4169; UNALIGNED_GFX940: ; %bb.0: ; %entry 4170; UNALIGNED_GFX940-NEXT: s_add_u32 s0, s0, 0xffffffe8 4171; UNALIGNED_GFX940-NEXT: scratch_load_dword v2, off, s0 4172; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) 4173; UNALIGNED_GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 4174; UNALIGNED_GFX940-NEXT: s_endpgm 4175; 4176; UNALIGNED_GFX11-LABEL: sgpr_base_negative_offset: 4177; UNALIGNED_GFX11: ; %bb.0: ; %entry 4178; UNALIGNED_GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24 4179; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) 4180; UNALIGNED_GFX11-NEXT: global_store_b32 v[0:1], v2, off 4181; UNALIGNED_GFX11-NEXT: s_endpgm 4182; 4183; UNALIGNED_GFX12-LABEL: sgpr_base_negative_offset: 4184; UNALIGNED_GFX12: ; %bb.0: ; %entry 4185; UNALIGNED_GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24 4186; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 4187; UNALIGNED_GFX12-NEXT: global_store_b32 v[0:1], v2, off 4188; UNALIGNED_GFX12-NEXT: s_endpgm 4189entry: 4190 %scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24 4191 %0 = load i32, ptr addrspace(5) %scevgep28, align 4 4192 store i32 %0, ptr addrspace(1) %out 4193 ret void 4194} 4195 4196declare i32 @llvm.amdgcn.workitem.id.x() 4197