1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG 5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12-SDAG 7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12-GISEL 8 9; Test flat scratch SVS addressing mode with various combinations of alignment 10; of soffset, voffset and inst_offset. 11 12declare i32 @llvm.amdgcn.workitem.id.x() 13 14define amdgpu_kernel void @soff1_voff1(i32 %soff) { 15; GFX940-SDAG-LABEL: soff1_voff1: 16; GFX940-SDAG: ; %bb.0: ; %bb 17; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 18; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 19; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 20; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 21; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 22; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 23; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 24; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 25; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 26; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 27; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 28; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 29; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 30; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 31; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 32; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 33; GFX940-SDAG-NEXT: s_endpgm 34; 35; GFX940-GISEL-LABEL: soff1_voff1: 36; GFX940-GISEL: ; %bb.0: ; %bb 37; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 38; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 39; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 40; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 41; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 42; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 43; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 44; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 45; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 46; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 47; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 48; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 49; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 50; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 51; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 52; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 53; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 54; GFX940-GISEL-NEXT: s_endpgm 55; 56; GFX11-SDAG-LABEL: soff1_voff1: 57; GFX11-SDAG: ; %bb.0: ; %bb 58; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 59; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 60; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 61; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 62; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 63; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 64; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 1, v0 65; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v5, 2, v0 66; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 4, v0 67; GFX11-SDAG-NEXT: scratch_store_b8 v4, v1, off dlc 68; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 69; GFX11-SDAG-NEXT: scratch_store_b8 v5, v2, off dlc 70; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 71; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc 72; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 73; GFX11-SDAG-NEXT: s_endpgm 74; 75; GFX11-GISEL-LABEL: soff1_voff1: 76; GFX11-GISEL: ; %bb.0: ; %bb 77; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 78; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 79; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 80; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 81; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 82; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 83; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 84; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 85; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 86; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 87; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 88; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 89; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 90; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 91; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 92; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 93; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 94; GFX11-GISEL-NEXT: s_endpgm 95; 96; GFX12-SDAG-LABEL: soff1_voff1: 97; GFX12-SDAG: ; %bb.0: ; %bb 98; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 99; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 100; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 101; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 102; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 103; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 104; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 105; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 106; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 107; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 108; GFX12-SDAG-NEXT: s_endpgm 109; 110; GFX12-GISEL-LABEL: soff1_voff1: 111; GFX12-GISEL: ; %bb.0: ; %bb 112; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 113; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 114; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 115; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 116; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 117; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 118; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 119; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 120; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 121; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 122; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 123; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 124; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 125; GFX12-GISEL-NEXT: s_endpgm 126bb: 127 %soff1 = mul i32 %soff, 1 128 %a = alloca i8, i32 64, align 4, addrspace(5) 129 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1 130 %voff = call i32 @llvm.amdgcn.workitem.id.x() 131 %voff1 = mul i32 %voff, 1 132 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1 133 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 134 store volatile i8 1, ptr addrspace(5) %p1 135 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 136 store volatile i8 2, ptr addrspace(5) %p2 137 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 138 store volatile i8 4, ptr addrspace(5) %p4 139 ret void 140} 141 142define amdgpu_kernel void @soff1_voff2(i32 %soff) { 143; GFX940-SDAG-LABEL: soff1_voff2: 144; GFX940-SDAG: ; %bb.0: ; %bb 145; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 146; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 147; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 148; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 149; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 150; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 151; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 152; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 153; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 154; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 155; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 156; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 157; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 158; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 159; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 160; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 161; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 162; GFX940-SDAG-NEXT: s_endpgm 163; 164; GFX940-GISEL-LABEL: soff1_voff2: 165; GFX940-GISEL: ; %bb.0: ; %bb 166; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 167; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 168; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 169; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 170; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 171; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 172; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 173; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 174; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 175; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 176; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 177; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 178; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 179; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 180; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 181; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 182; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 183; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 184; GFX940-GISEL-NEXT: s_endpgm 185; 186; GFX11-SDAG-LABEL: soff1_voff2: 187; GFX11-SDAG: ; %bb.0: ; %bb 188; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 189; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 190; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 191; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 192; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 193; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 194; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 195; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 196; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 1, v0 197; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 4, v0 198; GFX11-SDAG-NEXT: scratch_store_b8 v4, v1, off dlc 199; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 200; GFX11-SDAG-NEXT: scratch_store_b8 v5, v2, off dlc 201; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 202; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc 203; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 204; GFX11-SDAG-NEXT: s_endpgm 205; 206; GFX11-GISEL-LABEL: soff1_voff2: 207; GFX11-GISEL: ; %bb.0: ; %bb 208; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 209; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 210; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 211; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 212; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 213; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 214; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 215; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 216; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 217; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 218; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 219; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 220; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 221; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 222; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 223; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 224; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 225; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 226; GFX11-GISEL-NEXT: s_endpgm 227; 228; GFX12-SDAG-LABEL: soff1_voff2: 229; GFX12-SDAG: ; %bb.0: ; %bb 230; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 231; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 232; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 233; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 234; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 235; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 236; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 237; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 238; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 239; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 240; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 241; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 242; GFX12-SDAG-NEXT: s_endpgm 243; 244; GFX12-GISEL-LABEL: soff1_voff2: 245; GFX12-GISEL: ; %bb.0: ; %bb 246; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 247; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 248; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 249; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 250; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 251; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 252; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 253; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 254; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 255; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 256; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 257; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 258; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 259; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 260; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 261; GFX12-GISEL-NEXT: s_endpgm 262bb: 263 %soff1 = mul i32 %soff, 1 264 %a = alloca i8, i32 64, align 4, addrspace(5) 265 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1 266 %voff = call i32 @llvm.amdgcn.workitem.id.x() 267 %voff2 = mul i32 %voff, 2 268 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff2 269 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 270 store volatile i8 1, ptr addrspace(5) %p1 271 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 272 store volatile i8 2, ptr addrspace(5) %p2 273 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 274 store volatile i8 4, ptr addrspace(5) %p4 275 ret void 276} 277 278define amdgpu_kernel void @soff1_voff4(i32 %soff) { 279; GFX940-SDAG-LABEL: soff1_voff4: 280; GFX940-SDAG: ; %bb.0: ; %bb 281; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 282; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 283; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 284; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 285; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 286; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v2 287; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 288; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 289; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 290; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 291; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 292; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 293; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 294; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 295; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 296; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 297; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 298; GFX940-SDAG-NEXT: s_endpgm 299; 300; GFX940-GISEL-LABEL: soff1_voff4: 301; GFX940-GISEL: ; %bb.0: ; %bb 302; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 303; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 304; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 305; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 306; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 307; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 308; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 309; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 310; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 311; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 312; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 313; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 314; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 315; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 316; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 317; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 318; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 319; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 320; GFX940-GISEL-NEXT: s_endpgm 321; 322; GFX11-SDAG-LABEL: soff1_voff4: 323; GFX11-SDAG: ; %bb.0: ; %bb 324; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 325; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 326; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 327; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 328; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 329; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 330; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 331; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 332; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 1, v0 333; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 4, v0 334; GFX11-SDAG-NEXT: scratch_store_b8 v4, v1, off dlc 335; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 336; GFX11-SDAG-NEXT: scratch_store_b8 v5, v2, off dlc 337; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 338; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc 339; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 340; GFX11-SDAG-NEXT: s_endpgm 341; 342; GFX11-GISEL-LABEL: soff1_voff4: 343; GFX11-GISEL: ; %bb.0: ; %bb 344; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 345; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 346; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 347; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 348; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 349; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 350; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 351; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 352; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 353; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 354; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 355; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 356; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 357; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 358; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 359; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 360; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 361; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 362; GFX11-GISEL-NEXT: s_endpgm 363; 364; GFX12-SDAG-LABEL: soff1_voff4: 365; GFX12-SDAG: ; %bb.0: ; %bb 366; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 367; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 368; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 369; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 370; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 371; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 372; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 373; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 374; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 375; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 376; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 377; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 378; GFX12-SDAG-NEXT: s_endpgm 379; 380; GFX12-GISEL-LABEL: soff1_voff4: 381; GFX12-GISEL: ; %bb.0: ; %bb 382; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 383; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 384; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 385; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 386; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 387; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 388; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 389; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 390; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 391; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 392; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 393; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 394; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 395; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 396; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 397; GFX12-GISEL-NEXT: s_endpgm 398bb: 399 %soff1 = mul i32 %soff, 1 400 %a = alloca i8, i32 64, align 4, addrspace(5) 401 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1 402 %voff = call i32 @llvm.amdgcn.workitem.id.x() 403 %voff4 = mul i32 %voff, 4 404 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff4 405 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 406 store volatile i8 1, ptr addrspace(5) %p1 407 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 408 store volatile i8 2, ptr addrspace(5) %p2 409 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 410 store volatile i8 4, ptr addrspace(5) %p4 411 ret void 412} 413 414define amdgpu_kernel void @soff2_voff1(i32 %soff) { 415; GFX940-SDAG-LABEL: soff2_voff1: 416; GFX940-SDAG: ; %bb.0: ; %bb 417; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 418; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 419; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 420; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 421; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 422; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 423; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 424; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 425; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 426; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 427; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 428; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 429; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 430; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 431; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 432; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 433; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 434; GFX940-SDAG-NEXT: s_endpgm 435; 436; GFX940-GISEL-LABEL: soff2_voff1: 437; GFX940-GISEL: ; %bb.0: ; %bb 438; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 439; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 440; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 441; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 442; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 443; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 444; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 445; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 446; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 447; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 448; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 449; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 450; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 451; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 452; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 453; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 454; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 455; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 456; GFX940-GISEL-NEXT: s_endpgm 457; 458; GFX11-SDAG-LABEL: soff2_voff1: 459; GFX11-SDAG: ; %bb.0: ; %bb 460; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 461; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 462; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 463; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 464; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 465; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 466; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 467; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 468; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 1, v0 469; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v5, 2, v0 470; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 4, v0 471; GFX11-SDAG-NEXT: scratch_store_b8 v4, v1, off dlc 472; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 473; GFX11-SDAG-NEXT: scratch_store_b8 v5, v2, off dlc 474; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 475; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc 476; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 477; GFX11-SDAG-NEXT: s_endpgm 478; 479; GFX11-GISEL-LABEL: soff2_voff1: 480; GFX11-GISEL: ; %bb.0: ; %bb 481; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 482; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 483; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 484; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 485; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 486; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 487; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 488; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 489; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 490; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 491; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 492; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 493; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 494; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 495; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 496; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 497; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 498; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 499; GFX11-GISEL-NEXT: s_endpgm 500; 501; GFX12-SDAG-LABEL: soff2_voff1: 502; GFX12-SDAG: ; %bb.0: ; %bb 503; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 504; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 505; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0 506; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 507; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1 508; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 509; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 510; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 511; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 512; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 513; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 514; GFX12-SDAG-NEXT: s_endpgm 515; 516; GFX12-GISEL-LABEL: soff2_voff1: 517; GFX12-GISEL: ; %bb.0: ; %bb 518; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 519; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 520; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 521; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 522; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 523; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 524; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 525; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 526; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 527; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 528; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 529; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 530; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 531; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 532; GFX12-GISEL-NEXT: s_endpgm 533bb: 534 %soff2 = mul i32 %soff, 2 535 %a = alloca i8, i32 64, align 4, addrspace(5) 536 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff2 537 %voff = call i32 @llvm.amdgcn.workitem.id.x() 538 %voff1 = mul i32 %voff, 1 539 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1 540 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 541 store volatile i8 1, ptr addrspace(5) %p1 542 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 543 store volatile i8 2, ptr addrspace(5) %p2 544 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 545 store volatile i8 4, ptr addrspace(5) %p4 546 ret void 547} 548 549define amdgpu_kernel void @soff2_voff2(i32 %soff) { 550; GFX940-SDAG-LABEL: soff2_voff2: 551; GFX940-SDAG: ; %bb.0: ; %bb 552; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 553; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 554; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 555; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 556; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 557; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 558; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 559; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 560; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 561; GFX940-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 562; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 563; GFX940-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 564; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 565; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 566; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 567; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 568; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 569; GFX940-SDAG-NEXT: s_endpgm 570; 571; GFX940-GISEL-LABEL: soff2_voff2: 572; GFX940-GISEL: ; %bb.0: ; %bb 573; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 574; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 575; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 576; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 577; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 578; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 579; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 580; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 581; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 582; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 583; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 584; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 585; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 586; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 587; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 588; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 589; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 590; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 591; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 592; GFX940-GISEL-NEXT: s_endpgm 593; 594; GFX11-SDAG-LABEL: soff2_voff2: 595; GFX11-SDAG: ; %bb.0: ; %bb 596; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 597; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 598; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 599; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 600; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 601; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 602; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 603; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 604; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 605; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 4, v0 606; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 2, v0 607; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 608; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 609; GFX11-SDAG-NEXT: scratch_store_b8 v4, v2, off dlc 610; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 611; GFX11-SDAG-NEXT: scratch_store_b8 v5, v3, off dlc 612; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 613; GFX11-SDAG-NEXT: s_endpgm 614; 615; GFX11-GISEL-LABEL: soff2_voff2: 616; GFX11-GISEL: ; %bb.0: ; %bb 617; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 618; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 619; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 620; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 621; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 622; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 623; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 624; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 625; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 626; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 627; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 628; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 629; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 630; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 631; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 632; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 633; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 634; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 635; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 636; GFX11-GISEL-NEXT: s_endpgm 637; 638; GFX12-SDAG-LABEL: soff2_voff2: 639; GFX12-SDAG: ; %bb.0: ; %bb 640; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 641; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 642; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 643; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 644; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 645; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 646; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1 647; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 648; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 649; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 650; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 651; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 652; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 653; GFX12-SDAG-NEXT: s_endpgm 654; 655; GFX12-GISEL-LABEL: soff2_voff2: 656; GFX12-GISEL: ; %bb.0: ; %bb 657; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 658; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 659; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 660; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 661; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 662; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 663; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 664; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 665; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 666; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 667; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 668; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 669; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 670; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 671; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 672; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 673; GFX12-GISEL-NEXT: s_endpgm 674bb: 675 %soff2 = mul i32 %soff, 2 676 %a = alloca i8, i32 64, align 4, addrspace(5) 677 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff2 678 %voff = call i32 @llvm.amdgcn.workitem.id.x() 679 %voff2 = mul i32 %voff, 2 680 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff2 681 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 682 store volatile i8 1, ptr addrspace(5) %p1 683 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 684 store volatile i8 2, ptr addrspace(5) %p2 685 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 686 store volatile i8 4, ptr addrspace(5) %p4 687 ret void 688} 689 690define amdgpu_kernel void @soff2_voff4(i32 %soff) { 691; GFX940-SDAG-LABEL: soff2_voff4: 692; GFX940-SDAG: ; %bb.0: ; %bb 693; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 694; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 695; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 696; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 697; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 698; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 699; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v2 700; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 701; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 702; GFX940-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 703; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 704; GFX940-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 705; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 706; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 707; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 708; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 709; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 710; GFX940-SDAG-NEXT: s_endpgm 711; 712; GFX940-GISEL-LABEL: soff2_voff4: 713; GFX940-GISEL: ; %bb.0: ; %bb 714; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 715; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 716; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 717; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 718; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 719; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 720; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 721; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 722; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 723; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 724; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 725; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 726; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 727; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 728; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 729; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 730; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 731; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 732; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 733; GFX940-GISEL-NEXT: s_endpgm 734; 735; GFX11-SDAG-LABEL: soff2_voff4: 736; GFX11-SDAG: ; %bb.0: ; %bb 737; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 738; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 739; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 740; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 741; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 742; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 743; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 744; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 745; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 746; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 4, v0 747; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 2, v0 748; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 749; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 750; GFX11-SDAG-NEXT: scratch_store_b8 v4, v2, off dlc 751; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 752; GFX11-SDAG-NEXT: scratch_store_b8 v5, v3, off dlc 753; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 754; GFX11-SDAG-NEXT: s_endpgm 755; 756; GFX11-GISEL-LABEL: soff2_voff4: 757; GFX11-GISEL: ; %bb.0: ; %bb 758; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 759; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 760; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 761; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 762; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 763; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 764; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 765; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 766; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 767; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 768; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 769; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 770; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 771; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 772; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 773; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 774; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 775; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 776; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 777; GFX11-GISEL-NEXT: s_endpgm 778; 779; GFX12-SDAG-LABEL: soff2_voff4: 780; GFX12-SDAG: ; %bb.0: ; %bb 781; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 782; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 783; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 784; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 785; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 786; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 787; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1 788; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 789; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 790; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 791; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 792; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 793; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 794; GFX12-SDAG-NEXT: s_endpgm 795; 796; GFX12-GISEL-LABEL: soff2_voff4: 797; GFX12-GISEL: ; %bb.0: ; %bb 798; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 799; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 800; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 801; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 802; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 803; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 804; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 805; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 806; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 807; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 808; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 809; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 810; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 811; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 812; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 813; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 814; GFX12-GISEL-NEXT: s_endpgm 815bb: 816 %soff2 = mul i32 %soff, 2 817 %a = alloca i8, i32 64, align 4, addrspace(5) 818 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff2 819 %voff = call i32 @llvm.amdgcn.workitem.id.x() 820 %voff4 = mul i32 %voff, 4 821 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff4 822 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 823 store volatile i8 1, ptr addrspace(5) %p1 824 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 825 store volatile i8 2, ptr addrspace(5) %p2 826 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 827 store volatile i8 4, ptr addrspace(5) %p4 828 ret void 829} 830 831define amdgpu_kernel void @soff4_voff1(i32 %soff) { 832; GFX940-SDAG-LABEL: soff4_voff1: 833; GFX940-SDAG: ; %bb.0: ; %bb 834; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 835; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 836; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 837; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 838; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 839; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 840; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 841; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 842; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 843; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 844; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 845; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 846; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 847; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 848; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 849; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 850; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 851; GFX940-SDAG-NEXT: s_endpgm 852; 853; GFX940-GISEL-LABEL: soff4_voff1: 854; GFX940-GISEL: ; %bb.0: ; %bb 855; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 856; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 857; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 858; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 859; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 860; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 861; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 862; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 863; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 864; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 865; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 866; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 867; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 868; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 869; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 870; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 871; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 872; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 873; GFX940-GISEL-NEXT: s_endpgm 874; 875; GFX11-SDAG-LABEL: soff4_voff1: 876; GFX11-SDAG: ; %bb.0: ; %bb 877; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 878; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 879; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 880; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 881; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 882; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 883; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 884; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 885; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 1, v0 886; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v5, 2, v0 887; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 4, v0 888; GFX11-SDAG-NEXT: scratch_store_b8 v4, v1, off dlc 889; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 890; GFX11-SDAG-NEXT: scratch_store_b8 v5, v2, off dlc 891; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 892; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc 893; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 894; GFX11-SDAG-NEXT: s_endpgm 895; 896; GFX11-GISEL-LABEL: soff4_voff1: 897; GFX11-GISEL: ; %bb.0: ; %bb 898; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 899; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 900; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 901; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 902; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 903; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 904; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 905; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 906; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 907; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 908; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 909; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 910; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 911; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 912; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 913; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 914; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 915; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 916; GFX11-GISEL-NEXT: s_endpgm 917; 918; GFX12-SDAG-LABEL: soff4_voff1: 919; GFX12-SDAG: ; %bb.0: ; %bb 920; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 921; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 922; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0 923; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 924; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2 925; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 926; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 927; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 928; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 929; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 930; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 931; GFX12-SDAG-NEXT: s_endpgm 932; 933; GFX12-GISEL-LABEL: soff4_voff1: 934; GFX12-GISEL: ; %bb.0: ; %bb 935; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 936; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 937; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 938; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 939; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 940; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 941; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 942; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 943; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 944; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 945; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 946; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 947; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 948; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 949; GFX12-GISEL-NEXT: s_endpgm 950bb: 951 %soff4 = mul i32 %soff, 4 952 %a = alloca i8, i32 64, align 4, addrspace(5) 953 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff4 954 %voff = call i32 @llvm.amdgcn.workitem.id.x() 955 %voff1 = mul i32 %voff, 1 956 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1 957 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 958 store volatile i8 1, ptr addrspace(5) %p1 959 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 960 store volatile i8 2, ptr addrspace(5) %p2 961 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 962 store volatile i8 4, ptr addrspace(5) %p4 963 ret void 964} 965 966define amdgpu_kernel void @soff4_voff2(i32 %soff) { 967; GFX940-SDAG-LABEL: soff4_voff2: 968; GFX940-SDAG: ; %bb.0: ; %bb 969; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 970; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 971; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 972; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 973; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 974; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 975; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 976; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 977; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 978; GFX940-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 979; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 980; GFX940-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 981; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 982; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 983; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 984; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 985; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 986; GFX940-SDAG-NEXT: s_endpgm 987; 988; GFX940-GISEL-LABEL: soff4_voff2: 989; GFX940-GISEL: ; %bb.0: ; %bb 990; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 991; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 992; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 993; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 994; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 995; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 996; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 997; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 998; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 999; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 1000; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1001; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 1002; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 1003; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 1004; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1005; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 1006; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 1007; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 1008; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1009; GFX940-GISEL-NEXT: s_endpgm 1010; 1011; GFX11-SDAG-LABEL: soff4_voff2: 1012; GFX11-SDAG: ; %bb.0: ; %bb 1013; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 1014; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1015; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1016; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 1017; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1018; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 1019; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1020; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 1021; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1022; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 4, v0 1023; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v4, 2, v0 1024; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 1025; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1026; GFX11-SDAG-NEXT: scratch_store_b8 v4, v2, off dlc 1027; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1028; GFX11-SDAG-NEXT: scratch_store_b8 v5, v3, off dlc 1029; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1030; GFX11-SDAG-NEXT: s_endpgm 1031; 1032; GFX11-GISEL-LABEL: soff4_voff2: 1033; GFX11-GISEL: ; %bb.0: ; %bb 1034; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 1035; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1036; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1037; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 1038; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1039; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 1040; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 1041; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1042; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 1043; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1044; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 1045; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 1046; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 1047; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 1048; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1049; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 1050; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1051; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 1052; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1053; GFX11-GISEL-NEXT: s_endpgm 1054; 1055; GFX12-SDAG-LABEL: soff4_voff2: 1056; GFX12-SDAG: ; %bb.0: ; %bb 1057; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 1058; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1059; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 1060; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 1061; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1062; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1063; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2 1064; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 1065; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1066; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 1067; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1068; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 1069; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1070; GFX12-SDAG-NEXT: s_endpgm 1071; 1072; GFX12-GISEL-LABEL: soff4_voff2: 1073; GFX12-GISEL: ; %bb.0: ; %bb 1074; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 1075; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1076; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 1077; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1078; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1079; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1080; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 1081; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 1082; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1083; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 1084; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 1085; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1086; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 1087; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1088; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 1089; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1090; GFX12-GISEL-NEXT: s_endpgm 1091bb: 1092 %soff4 = mul i32 %soff, 4 1093 %a = alloca i8, i32 64, align 4, addrspace(5) 1094 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff4 1095 %voff = call i32 @llvm.amdgcn.workitem.id.x() 1096 %voff2 = mul i32 %voff, 2 1097 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff2 1098 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 1099 store volatile i8 1, ptr addrspace(5) %p1 1100 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 1101 store volatile i8 2, ptr addrspace(5) %p2 1102 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 1103 store volatile i8 4, ptr addrspace(5) %p4 1104 ret void 1105} 1106 1107define amdgpu_kernel void @soff4_voff4(i32 %soff) { 1108; GFX940-SDAG-LABEL: soff4_voff4: 1109; GFX940-SDAG: ; %bb.0: ; %bb 1110; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 1111; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1112; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 1113; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 1114; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1115; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 1116; GFX940-SDAG-NEXT: v_mov_b32_e32 v3, s0 1117; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v3 1118; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 1119; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 1120; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, off offset:2 sc0 sc1 1121; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 1122; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 1123; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 1124; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 1125; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 1126; GFX940-SDAG-NEXT: s_endpgm 1127; 1128; GFX940-GISEL-LABEL: soff4_voff4: 1129; GFX940-GISEL: ; %bb.0: ; %bb 1130; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 1131; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1132; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1133; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 1134; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1135; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 1136; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 1137; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 1138; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 1139; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 1140; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1141; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 1142; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 1143; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 1144; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1145; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 1146; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 1147; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 1148; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1149; GFX940-GISEL-NEXT: s_endpgm 1150; 1151; GFX11-SDAG-LABEL: soff4_voff4: 1152; GFX11-SDAG: ; %bb.0: ; %bb 1153; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 1154; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1155; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 4 1156; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 1157; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1158; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1159; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 1160; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1161; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 1162; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1163; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v3, 4, v0 1164; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 1165; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1166; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 1167; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1168; GFX11-SDAG-NEXT: scratch_store_b8 v3, v4, off dlc 1169; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1170; GFX11-SDAG-NEXT: s_endpgm 1171; 1172; GFX11-GISEL-LABEL: soff4_voff4: 1173; GFX11-GISEL: ; %bb.0: ; %bb 1174; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 1175; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1176; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1177; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 1178; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1179; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 1180; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 1181; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1182; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 1183; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1184; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 1185; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 1186; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 1187; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc 1188; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1189; GFX11-GISEL-NEXT: scratch_store_b8 v5, v2, off dlc 1190; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1191; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc 1192; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1193; GFX11-GISEL-NEXT: s_endpgm 1194; 1195; GFX12-SDAG-LABEL: soff4_voff4: 1196; GFX12-SDAG: ; %bb.0: ; %bb 1197; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 1198; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1199; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 1200; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 1201; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1202; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1203; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2 1204; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS 1205; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1206; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS 1207; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1208; GFX12-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS 1209; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1210; GFX12-SDAG-NEXT: s_endpgm 1211; 1212; GFX12-GISEL-LABEL: soff4_voff4: 1213; GFX12-GISEL: ; %bb.0: ; %bb 1214; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 1215; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1216; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 1217; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1218; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1219; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1220; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 1221; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 1222; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1223; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 1224; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS 1225; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1226; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS 1227; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1228; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS 1229; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1230; GFX12-GISEL-NEXT: s_endpgm 1231bb: 1232 %soff4 = mul i32 %soff, 4 1233 %a = alloca i8, i32 64, align 4, addrspace(5) 1234 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff4 1235 %voff = call i32 @llvm.amdgcn.workitem.id.x() 1236 %voff4 = mul i32 %voff, 4 1237 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff4 1238 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1 1239 store volatile i8 1, ptr addrspace(5) %p1 1240 %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2 1241 store volatile i8 2, ptr addrspace(5) %p2 1242 %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4 1243 store volatile i8 4, ptr addrspace(5) %p4 1244 ret void 1245} 1246 1247define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { 1248; GFX940-SDAG-LABEL: soff1_voff1_negative: 1249; GFX940-SDAG: ; %bb.0: ; %bb 1250; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 1251; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1252; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 1253; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1254; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 1255; GFX940-SDAG-NEXT: v_add_u32_e32 v0, -1, v0 1256; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 1257; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 1258; GFX940-SDAG-NEXT: s_endpgm 1259; 1260; GFX940-GISEL-LABEL: soff1_voff1_negative: 1261; GFX940-GISEL: ; %bb.0: ; %bb 1262; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 1263; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1264; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 1265; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1266; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 1267; GFX940-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1 1268; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 1269; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 1270; GFX940-GISEL-NEXT: s_endpgm 1271; 1272; GFX11-SDAG-LABEL: soff1_voff1_negative: 1273; GFX11-SDAG: ; %bb.0: ; %bb 1274; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 1275; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1276; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1277; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1278; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0 1279; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc 1280; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1281; GFX11-SDAG-NEXT: s_endpgm 1282; 1283; GFX11-GISEL-LABEL: soff1_voff1_negative: 1284; GFX11-GISEL: ; %bb.0: ; %bb 1285; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 1286; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1287; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1288; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 1289; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1290; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 1291; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc 1292; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1293; GFX11-GISEL-NEXT: s_endpgm 1294; 1295; GFX12-SDAG-LABEL: soff1_voff1_negative: 1296; GFX12-SDAG: ; %bb.0: ; %bb 1297; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24 1298; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1299; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1300; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:-1 scope:SCOPE_SYS 1301; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 1302; GFX12-SDAG-NEXT: s_endpgm 1303; 1304; GFX12-GISEL-LABEL: soff1_voff1_negative: 1305; GFX12-GISEL: ; %bb.0: ; %bb 1306; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 1307; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 1308; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1309; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 1310; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1311; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 1312; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS 1313; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 1314; GFX12-GISEL-NEXT: s_endpgm 1315bb: 1316 %a = alloca [64 x i8], align 4, addrspace(5) 1317 %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff 1318 %voff = call i32 @llvm.amdgcn.workitem.id.x() 1319 %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff 1320 %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 -1 1321 store volatile i8 1, ptr addrspace(5) %p1 1322 ret void 1323} 1324