1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd- -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s 10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s 11 12define amdgpu_kernel void @private_volatile_load_0( 13; GFX6-LABEL: private_volatile_load_0: 14; GFX6: ; %bb.0: ; %entry 15; GFX6-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 16; GFX6-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 17; GFX6-NEXT: s_mov_b32 s14, -1 18; GFX6-NEXT: s_mov_b32 s15, 0xe8f000 19; GFX6-NEXT: s_add_u32 s12, s12, s11 20; GFX6-NEXT: s_addc_u32 s13, s13, 0 21; GFX6-NEXT: s_mov_b64 s[0:1], s[4:5] 22; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 23; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 24; GFX6-NEXT: s_waitcnt lgkmcnt(0) 25; GFX6-NEXT: s_mov_b32 s7, s1 26; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 27; GFX6-NEXT: s_mov_b32 s5, 0xf000 28; GFX6-NEXT: s_mov_b32 s6, -1 29; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 30; GFX6-NEXT: s_mov_b32 s1, s7 31; GFX6-NEXT: s_mov_b32 s2, s6 32; GFX6-NEXT: s_mov_b32 s3, s5 33; GFX6-NEXT: v_mov_b32_e32 v0, s4 34; GFX6-NEXT: buffer_load_dword v0, v0, s[12:15], 0 offen glc 35; GFX6-NEXT: s_waitcnt vmcnt(0) 36; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 37; GFX6-NEXT: s_endpgm 38; 39; GFX7-LABEL: private_volatile_load_0: 40; GFX7: ; %bb.0: ; %entry 41; GFX7-NEXT: s_add_u32 s0, s0, s15 42; GFX7-NEXT: s_addc_u32 s1, s1, 0 43; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0 44; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 45; GFX7-NEXT: s_waitcnt lgkmcnt(0) 46; GFX7-NEXT: v_mov_b32_e32 v0, s6 47; GFX7-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen glc 48; GFX7-NEXT: s_waitcnt vmcnt(0) 49; GFX7-NEXT: v_mov_b32_e32 v0, s4 50; GFX7-NEXT: v_mov_b32_e32 v1, s5 51; GFX7-NEXT: flat_store_dword v[0:1], v2 52; GFX7-NEXT: s_endpgm 53; 54; GFX10-WGP-LABEL: private_volatile_load_0: 55; GFX10-WGP: ; %bb.0: ; %entry 56; GFX10-WGP-NEXT: s_add_u32 s0, s0, s15 57; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 58; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 59; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 60; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 61; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 62; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 63; GFX10-WGP-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc dlc 64; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 65; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 66; GFX10-WGP-NEXT: s_endpgm 67; 68; GFX10-CU-LABEL: private_volatile_load_0: 69; GFX10-CU: ; %bb.0: ; %entry 70; GFX10-CU-NEXT: s_add_u32 s0, s0, s15 71; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 72; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 73; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 74; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 75; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 76; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 77; GFX10-CU-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc dlc 78; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 79; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 80; GFX10-CU-NEXT: s_endpgm 81; 82; SKIP-CACHE-INV-LABEL: private_volatile_load_0: 83; SKIP-CACHE-INV: ; %bb.0: ; %entry 84; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[12:13] 85; SKIP-CACHE-INV-NEXT: s_mov_b32 s12, s0 86; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0 87; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 88; SKIP-CACHE-INV-NEXT: s_add_u32 s12, s12, s11 89; SKIP-CACHE-INV-NEXT: s_addc_u32 s13, s13, 0 90; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 91; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 92; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 93; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 94; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 95; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 96; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 97; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 98; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 99; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 100; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 101; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 102; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 103; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, v0, s[12:15], 0 offen glc 104; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 105; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 106; SKIP-CACHE-INV-NEXT: s_endpgm 107; 108; GFX11-WGP-LABEL: private_volatile_load_0: 109; GFX11-WGP: ; %bb.0: ; %entry 110; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 111; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 112; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 113; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 114; GFX11-WGP-NEXT: scratch_load_b32 v1, off, s2 glc dlc 115; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 116; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 117; GFX11-WGP-NEXT: s_endpgm 118; 119; GFX11-CU-LABEL: private_volatile_load_0: 120; GFX11-CU: ; %bb.0: ; %entry 121; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 122; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 123; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 124; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 125; GFX11-CU-NEXT: scratch_load_b32 v1, off, s2 glc dlc 126; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 127; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 128; GFX11-CU-NEXT: s_endpgm 129; 130; GFX12-WGP-LABEL: private_volatile_load_0: 131; GFX12-WGP: ; %bb.0: ; %entry 132; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 133; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 134; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 135; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 136; GFX12-WGP-NEXT: scratch_load_b32 v1, off, s2 scope:SCOPE_SYS 137; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 138; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 139; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 140; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 141; GFX12-WGP-NEXT: s_endpgm 142; 143; GFX12-CU-LABEL: private_volatile_load_0: 144; GFX12-CU: ; %bb.0: ; %entry 145; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 146; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 147; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 148; GFX12-CU-NEXT: s_wait_kmcnt 0x0 149; GFX12-CU-NEXT: scratch_load_b32 v1, off, s2 scope:SCOPE_SYS 150; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 151; GFX12-CU-NEXT: s_wait_samplecnt 0x0 152; GFX12-CU-NEXT: s_wait_loadcnt 0x0 153; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 154; GFX12-CU-NEXT: s_endpgm 155 ptr addrspace(5) %in, ptr addrspace(1) %out) { 156entry: 157 %val = load volatile i32, ptr addrspace(5) %in, align 4 158 store i32 %val, ptr addrspace(1) %out 159 ret void 160} 161 162define amdgpu_kernel void @private_volatile_load_1( 163; GFX6-LABEL: private_volatile_load_1: 164; GFX6: ; %bb.0: ; %entry 165; GFX6-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 166; GFX6-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 167; GFX6-NEXT: s_mov_b32 s14, -1 168; GFX6-NEXT: s_mov_b32 s15, 0xe8f000 169; GFX6-NEXT: s_add_u32 s12, s12, s11 170; GFX6-NEXT: s_addc_u32 s13, s13, 0 171; GFX6-NEXT: s_mov_b64 s[0:1], s[4:5] 172; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 173; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 174; GFX6-NEXT: s_waitcnt lgkmcnt(0) 175; GFX6-NEXT: s_mov_b32 s7, s1 176; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 177; GFX6-NEXT: s_mov_b32 s5, 0xf000 178; GFX6-NEXT: s_mov_b32 s6, -1 179; GFX6-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 180; GFX6-NEXT: s_mov_b32 s1, s7 181; GFX6-NEXT: s_mov_b32 s2, s6 182; GFX6-NEXT: s_mov_b32 s3, s5 183; GFX6-NEXT: s_mov_b32 s5, 2 184; GFX6-NEXT: v_lshlrev_b32_e64 v0, s5, v0 185; GFX6-NEXT: v_add_i32_e64 v0, s[4:5], s4, v0 186; GFX6-NEXT: buffer_load_dword v0, v0, s[12:15], 0 offen glc 187; GFX6-NEXT: s_waitcnt vmcnt(0) 188; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 189; GFX6-NEXT: s_endpgm 190; 191; GFX7-LABEL: private_volatile_load_1: 192; GFX7: ; %bb.0: ; %entry 193; GFX7-NEXT: s_add_u32 s0, s0, s15 194; GFX7-NEXT: s_addc_u32 s1, s1, 0 195; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0 196; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 197; GFX7-NEXT: s_mov_b32 s7, 2 198; GFX7-NEXT: v_lshlrev_b32_e64 v0, s7, v0 199; GFX7-NEXT: s_waitcnt lgkmcnt(0) 200; GFX7-NEXT: v_add_i32_e64 v0, s[6:7], s6, v0 201; GFX7-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen glc 202; GFX7-NEXT: s_waitcnt vmcnt(0) 203; GFX7-NEXT: v_mov_b32_e32 v0, s4 204; GFX7-NEXT: v_mov_b32_e32 v1, s5 205; GFX7-NEXT: flat_store_dword v[0:1], v2 206; GFX7-NEXT: s_endpgm 207; 208; GFX10-WGP-LABEL: private_volatile_load_1: 209; GFX10-WGP: ; %bb.0: ; %entry 210; GFX10-WGP-NEXT: s_add_u32 s0, s0, s15 211; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 212; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0 213; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0 214; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 215; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 216; GFX10-WGP-NEXT: s_mov_b32 s6, 2 217; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 218; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7 219; GFX10-WGP-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc dlc 220; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 221; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 222; GFX10-WGP-NEXT: s_endpgm 223; 224; GFX10-CU-LABEL: private_volatile_load_1: 225; GFX10-CU: ; %bb.0: ; %entry 226; GFX10-CU-NEXT: s_add_u32 s0, s0, s15 227; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 228; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0 229; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0 230; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 231; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 232; GFX10-CU-NEXT: s_mov_b32 s6, 2 233; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 234; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7 235; GFX10-CU-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc dlc 236; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 237; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 238; GFX10-CU-NEXT: s_endpgm 239; 240; SKIP-CACHE-INV-LABEL: private_volatile_load_1: 241; SKIP-CACHE-INV: ; %bb.0: ; %entry 242; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[12:13] 243; SKIP-CACHE-INV-NEXT: s_mov_b32 s12, s0 244; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0 245; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 246; SKIP-CACHE-INV-NEXT: s_add_u32 s12, s12, s11 247; SKIP-CACHE-INV-NEXT: s_addc_u32 s13, s13, 0 248; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 249; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 250; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 251; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 252; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 253; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 254; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 255; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 256; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 257; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 258; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 259; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 260; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 2 261; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e64 v0, s5, v0 262; SKIP-CACHE-INV-NEXT: v_add_i32_e64 v0, s[4:5], s4, v0 263; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, v0, s[12:15], 0 offen glc 264; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 265; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 266; SKIP-CACHE-INV-NEXT: s_endpgm 267; 268; GFX11-WGP-LABEL: private_volatile_load_1: 269; GFX11-WGP: ; %bb.0: ; %entry 270; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0 271; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 272; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 273; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 274; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff 275; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2 276; GFX11-WGP-NEXT: s_mov_b32 s2, 2 277; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 278; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 279; GFX11-WGP-NEXT: scratch_load_b32 v1, v1, off glc dlc 280; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 281; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 282; GFX11-WGP-NEXT: s_endpgm 283; 284; GFX11-CU-LABEL: private_volatile_load_1: 285; GFX11-CU: ; %bb.0: ; %entry 286; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0 287; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 288; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 289; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 290; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff 291; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2 292; GFX11-CU-NEXT: s_mov_b32 s2, 2 293; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 294; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 295; GFX11-CU-NEXT: scratch_load_b32 v1, v1, off glc dlc 296; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 297; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 298; GFX11-CU-NEXT: s_endpgm 299; 300; GFX12-WGP-LABEL: private_volatile_load_1: 301; GFX12-WGP: ; %bb.0: ; %entry 302; GFX12-WGP-NEXT: v_mov_b32_e32 v1, v0 303; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 304; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 305; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 306; GFX12-WGP-NEXT: s_mov_b32 s3, 0x3ff 307; GFX12-WGP-NEXT: s_wait_alu 0xfffe 308; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s3 309; GFX12-WGP-NEXT: s_mov_b32 s3, 2 310; GFX12-WGP-NEXT: s_wait_alu 0xfffe 311; GFX12-WGP-NEXT: v_lshlrev_b32_e64 v1, s3, v1 312; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 313; GFX12-WGP-NEXT: scratch_load_b32 v1, v1, s2 scope:SCOPE_SYS 314; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 315; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 316; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 317; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 318; GFX12-WGP-NEXT: s_endpgm 319; 320; GFX12-CU-LABEL: private_volatile_load_1: 321; GFX12-CU: ; %bb.0: ; %entry 322; GFX12-CU-NEXT: v_mov_b32_e32 v1, v0 323; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 324; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 325; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 326; GFX12-CU-NEXT: s_mov_b32 s3, 0x3ff 327; GFX12-CU-NEXT: s_wait_alu 0xfffe 328; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s3 329; GFX12-CU-NEXT: s_mov_b32 s3, 2 330; GFX12-CU-NEXT: s_wait_alu 0xfffe 331; GFX12-CU-NEXT: v_lshlrev_b32_e64 v1, s3, v1 332; GFX12-CU-NEXT: s_wait_kmcnt 0x0 333; GFX12-CU-NEXT: scratch_load_b32 v1, v1, s2 scope:SCOPE_SYS 334; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 335; GFX12-CU-NEXT: s_wait_samplecnt 0x0 336; GFX12-CU-NEXT: s_wait_loadcnt 0x0 337; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 338; GFX12-CU-NEXT: s_endpgm 339 ptr addrspace(5) %in, ptr addrspace(1) %out) { 340entry: 341 %tid = call i32 @llvm.amdgcn.workitem.id.x() 342 %val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid 343 %val = load volatile i32, ptr addrspace(5) %val.gep, align 4 344 store i32 %val, ptr addrspace(1) %out 345 ret void 346} 347 348define amdgpu_kernel void @private_volatile_store_0( 349; GFX6-LABEL: private_volatile_store_0: 350; GFX6: ; %bb.0: ; %entry 351; GFX6-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 352; GFX6-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 353; GFX6-NEXT: s_mov_b32 s14, -1 354; GFX6-NEXT: s_mov_b32 s15, 0xe8f000 355; GFX6-NEXT: s_add_u32 s12, s12, s11 356; GFX6-NEXT: s_addc_u32 s13, s13, 0 357; GFX6-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x9 358; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb 359; GFX6-NEXT: s_waitcnt lgkmcnt(0) 360; GFX6-NEXT: s_load_dword s1, s[2:3], 0x0 361; GFX6-NEXT: s_waitcnt lgkmcnt(0) 362; GFX6-NEXT: v_mov_b32_e32 v0, s1 363; GFX6-NEXT: v_mov_b32_e32 v1, s0 364; GFX6-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen 365; GFX6-NEXT: s_waitcnt vmcnt(0) 366; GFX6-NEXT: s_endpgm 367; 368; GFX7-LABEL: private_volatile_store_0: 369; GFX7: ; %bb.0: ; %entry 370; GFX7-NEXT: s_add_u32 s0, s0, s15 371; GFX7-NEXT: s_addc_u32 s1, s1, 0 372; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 373; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 374; GFX7-NEXT: s_waitcnt lgkmcnt(0) 375; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 376; GFX7-NEXT: s_waitcnt lgkmcnt(0) 377; GFX7-NEXT: v_mov_b32_e32 v0, s5 378; GFX7-NEXT: v_mov_b32_e32 v1, s4 379; GFX7-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 380; GFX7-NEXT: s_waitcnt vmcnt(0) 381; GFX7-NEXT: s_endpgm 382; 383; GFX10-WGP-LABEL: private_volatile_store_0: 384; GFX10-WGP: ; %bb.0: ; %entry 385; GFX10-WGP-NEXT: s_add_u32 s0, s0, s15 386; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 387; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 388; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 389; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 390; GFX10-WGP-NEXT: s_load_dword s5, s[6:7], 0x0 391; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 392; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5 393; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4 394; GFX10-WGP-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 395; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 396; GFX10-WGP-NEXT: s_endpgm 397; 398; GFX10-CU-LABEL: private_volatile_store_0: 399; GFX10-CU: ; %bb.0: ; %entry 400; GFX10-CU-NEXT: s_add_u32 s0, s0, s15 401; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 402; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 403; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 404; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 405; GFX10-CU-NEXT: s_load_dword s5, s[6:7], 0x0 406; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 407; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 408; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 409; GFX10-CU-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 410; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 411; GFX10-CU-NEXT: s_endpgm 412; 413; SKIP-CACHE-INV-LABEL: private_volatile_store_0: 414; SKIP-CACHE-INV: ; %bb.0: ; %entry 415; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[12:13] 416; SKIP-CACHE-INV-NEXT: s_mov_b32 s12, s0 417; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0 418; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 419; SKIP-CACHE-INV-NEXT: s_add_u32 s12, s12, s11 420; SKIP-CACHE-INV-NEXT: s_addc_u32 s13, s13, 0 421; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 422; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 423; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 424; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 425; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 426; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 427; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 428; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen 429; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 430; SKIP-CACHE-INV-NEXT: s_endpgm 431; 432; GFX11-WGP-LABEL: private_volatile_store_0: 433; GFX11-WGP: ; %bb.0: ; %entry 434; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 435; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 436; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 437; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0 438; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 439; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1 440; GFX11-WGP-NEXT: scratch_store_b32 off, v0, s0 dlc 441; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 442; GFX11-WGP-NEXT: s_endpgm 443; 444; GFX11-CU-LABEL: private_volatile_store_0: 445; GFX11-CU: ; %bb.0: ; %entry 446; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 447; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 448; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 449; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0 450; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 451; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 452; GFX11-CU-NEXT: scratch_store_b32 off, v0, s0 dlc 453; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 454; GFX11-CU-NEXT: s_endpgm 455; 456; GFX12-WGP-LABEL: private_volatile_store_0: 457; GFX12-WGP: ; %bb.0: ; %entry 458; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 459; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 460; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 461; GFX12-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0 462; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 463; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1 464; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 465; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 466; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 467; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 468; GFX12-WGP-NEXT: s_wait_storecnt 0x0 469; GFX12-WGP-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 470; GFX12-WGP-NEXT: s_wait_storecnt 0x0 471; GFX12-WGP-NEXT: s_endpgm 472; 473; GFX12-CU-LABEL: private_volatile_store_0: 474; GFX12-CU: ; %bb.0: ; %entry 475; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 476; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 477; GFX12-CU-NEXT: s_wait_kmcnt 0x0 478; GFX12-CU-NEXT: s_load_b32 s1, s[2:3], 0x0 479; GFX12-CU-NEXT: s_wait_kmcnt 0x0 480; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 481; GFX12-CU-NEXT: s_wait_loadcnt 0x0 482; GFX12-CU-NEXT: s_wait_samplecnt 0x0 483; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 484; GFX12-CU-NEXT: s_wait_kmcnt 0x0 485; GFX12-CU-NEXT: s_wait_storecnt 0x0 486; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS 487; GFX12-CU-NEXT: s_wait_storecnt 0x0 488; GFX12-CU-NEXT: s_endpgm 489 ptr addrspace(1) %in, ptr addrspace(5) %out) { 490entry: 491 %val = load i32, ptr addrspace(1) %in, align 4 492 store volatile i32 %val, ptr addrspace(5) %out 493 ret void 494} 495 496define amdgpu_kernel void @private_volatile_store_1( 497; GFX6-LABEL: private_volatile_store_1: 498; GFX6: ; %bb.0: ; %entry 499; GFX6-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 500; GFX6-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 501; GFX6-NEXT: s_mov_b32 s14, -1 502; GFX6-NEXT: s_mov_b32 s15, 0xe8f000 503; GFX6-NEXT: s_add_u32 s12, s12, s11 504; GFX6-NEXT: s_addc_u32 s13, s13, 0 505; GFX6-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x9 506; GFX6-NEXT: s_load_dword s1, s[4:5], 0xb 507; GFX6-NEXT: s_waitcnt lgkmcnt(0) 508; GFX6-NEXT: s_load_dword s0, s[2:3], 0x0 509; GFX6-NEXT: s_mov_b32 s2, 2 510; GFX6-NEXT: v_lshlrev_b32_e64 v0, s2, v0 511; GFX6-NEXT: v_add_i32_e64 v1, s[2:3], s1, v0 512; GFX6-NEXT: s_waitcnt lgkmcnt(0) 513; GFX6-NEXT: v_mov_b32_e32 v0, s0 514; GFX6-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen 515; GFX6-NEXT: s_waitcnt vmcnt(0) 516; GFX6-NEXT: s_endpgm 517; 518; GFX7-LABEL: private_volatile_store_1: 519; GFX7: ; %bb.0: ; %entry 520; GFX7-NEXT: s_add_u32 s0, s0, s15 521; GFX7-NEXT: s_addc_u32 s1, s1, 0 522; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 523; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2 524; GFX7-NEXT: s_waitcnt lgkmcnt(0) 525; GFX7-NEXT: s_load_dword s4, s[6:7], 0x0 526; GFX7-NEXT: s_mov_b32 s6, 2 527; GFX7-NEXT: v_lshlrev_b32_e64 v0, s6, v0 528; GFX7-NEXT: v_add_i32_e64 v1, s[6:7], s5, v0 529; GFX7-NEXT: s_waitcnt lgkmcnt(0) 530; GFX7-NEXT: v_mov_b32_e32 v0, s4 531; GFX7-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 532; GFX7-NEXT: s_waitcnt vmcnt(0) 533; GFX7-NEXT: s_endpgm 534; 535; GFX10-WGP-LABEL: private_volatile_store_1: 536; GFX10-WGP: ; %bb.0: ; %entry 537; GFX10-WGP-NEXT: s_add_u32 s0, s0, s15 538; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 539; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 540; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 541; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 542; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0 543; GFX10-WGP-NEXT: s_mov_b32 s5, 2 544; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v0, s5, s6 545; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 546; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 547; GFX10-WGP-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 548; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 549; GFX10-WGP-NEXT: s_endpgm 550; 551; GFX10-CU-LABEL: private_volatile_store_1: 552; GFX10-CU: ; %bb.0: ; %entry 553; GFX10-CU-NEXT: s_add_u32 s0, s0, s15 554; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 555; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 556; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 557; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 558; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0 559; GFX10-CU-NEXT: s_mov_b32 s5, 2 560; GFX10-CU-NEXT: v_lshl_add_u32 v1, v0, s5, s6 561; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 562; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 563; GFX10-CU-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 564; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 565; GFX10-CU-NEXT: s_endpgm 566; 567; SKIP-CACHE-INV-LABEL: private_volatile_store_1: 568; SKIP-CACHE-INV: ; %bb.0: ; %entry 569; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[12:13] 570; SKIP-CACHE-INV-NEXT: s_mov_b32 s12, s0 571; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0 572; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 573; SKIP-CACHE-INV-NEXT: s_add_u32 s12, s12, s11 574; SKIP-CACHE-INV-NEXT: s_addc_u32 s13, s13, 0 575; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 576; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2 577; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 578; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[2:3], 0x0 579; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, 2 580; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e64 v0, s2, v0 581; SKIP-CACHE-INV-NEXT: v_add_i32_e64 v1, s[2:3], s1, v0 582; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 583; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 584; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen 585; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 586; SKIP-CACHE-INV-NEXT: s_endpgm 587; 588; GFX11-WGP-LABEL: private_volatile_store_1: 589; GFX11-WGP: ; %bb.0: ; %entry 590; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 591; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 592; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 593; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 594; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff 595; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1 596; GFX11-WGP-NEXT: s_mov_b32 s1, 2 597; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v0, s1, s2 598; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 599; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 600; GFX11-WGP-NEXT: scratch_store_b32 v1, v0, off dlc 601; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 602; GFX11-WGP-NEXT: s_endpgm 603; 604; GFX11-CU-LABEL: private_volatile_store_1: 605; GFX11-CU: ; %bb.0: ; %entry 606; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 607; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 608; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 609; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 610; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff 611; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1 612; GFX11-CU-NEXT: s_mov_b32 s1, 2 613; GFX11-CU-NEXT: v_lshl_add_u32 v1, v0, s1, s2 614; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 615; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 616; GFX11-CU-NEXT: scratch_store_b32 v1, v0, off dlc 617; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 618; GFX11-CU-NEXT: s_endpgm 619; 620; GFX12-WGP-LABEL: private_volatile_store_1: 621; GFX12-WGP: ; %bb.0: ; %entry 622; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 623; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 624; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 625; GFX12-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0 626; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff 627; GFX12-WGP-NEXT: s_wait_alu 0xfffe 628; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s2 629; GFX12-WGP-NEXT: s_mov_b32 s2, 2 630; GFX12-WGP-NEXT: s_wait_alu 0xfffe 631; GFX12-WGP-NEXT: v_lshlrev_b32_e64 v1, s2, v0 632; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 633; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1 634; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 635; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 636; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 637; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 638; GFX12-WGP-NEXT: s_wait_storecnt 0x0 639; GFX12-WGP-NEXT: scratch_store_b32 v1, v0, s0 scope:SCOPE_SYS 640; GFX12-WGP-NEXT: s_wait_storecnt 0x0 641; GFX12-WGP-NEXT: s_endpgm 642; 643; GFX12-CU-LABEL: private_volatile_store_1: 644; GFX12-CU: ; %bb.0: ; %entry 645; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 646; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 647; GFX12-CU-NEXT: s_wait_kmcnt 0x0 648; GFX12-CU-NEXT: s_load_b32 s1, s[2:3], 0x0 649; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff 650; GFX12-CU-NEXT: s_wait_alu 0xfffe 651; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s2 652; GFX12-CU-NEXT: s_mov_b32 s2, 2 653; GFX12-CU-NEXT: s_wait_alu 0xfffe 654; GFX12-CU-NEXT: v_lshlrev_b32_e64 v1, s2, v0 655; GFX12-CU-NEXT: s_wait_kmcnt 0x0 656; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 657; GFX12-CU-NEXT: s_wait_loadcnt 0x0 658; GFX12-CU-NEXT: s_wait_samplecnt 0x0 659; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 660; GFX12-CU-NEXT: s_wait_kmcnt 0x0 661; GFX12-CU-NEXT: s_wait_storecnt 0x0 662; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 scope:SCOPE_SYS 663; GFX12-CU-NEXT: s_wait_storecnt 0x0 664; GFX12-CU-NEXT: s_endpgm 665 ptr addrspace(1) %in, ptr addrspace(5) %out) { 666entry: 667 %tid = call i32 @llvm.amdgcn.workitem.id.x() 668 %val = load i32, ptr addrspace(1) %in, align 4 669 %out.gep = getelementptr inbounds i32, ptr addrspace(5) %out, i32 %tid 670 store volatile i32 %val, ptr addrspace(5) %out.gep 671 ret void 672} 673 674declare i32 @llvm.amdgcn.workitem.id.x() 675