1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s 10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s 11; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s 12; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s 13; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s 14; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s 15 16define amdgpu_kernel void @global_agent_unordered_load( 17; GFX6-LABEL: global_agent_unordered_load: 18; GFX6: ; %bb.0: ; %entry 19; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 20; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 21; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 22; GFX6-NEXT: s_waitcnt lgkmcnt(0) 23; GFX6-NEXT: s_mov_b32 s6, s9 24; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 25; GFX6-NEXT: s_mov_b32 s12, 0x100f000 26; GFX6-NEXT: s_mov_b32 s13, -1 27; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 28; GFX6-NEXT: s_mov_b32 s9, s6 29; GFX6-NEXT: s_mov_b32 s10, s13 30; GFX6-NEXT: s_mov_b32 s11, s12 31; GFX6-NEXT: s_mov_b32 s14, s5 32; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 33; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 34; GFX6-NEXT: s_mov_b32 s5, s14 35; GFX6-NEXT: s_mov_b32 s6, s13 36; GFX6-NEXT: s_mov_b32 s7, s12 37; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 38; GFX6-NEXT: s_waitcnt vmcnt(0) 39; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 40; GFX6-NEXT: s_endpgm 41; 42; GFX7-LABEL: global_agent_unordered_load: 43; GFX7: ; %bb.0: ; %entry 44; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 45; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 46; GFX7-NEXT: s_waitcnt lgkmcnt(0) 47; GFX7-NEXT: v_mov_b32_e32 v0, s6 48; GFX7-NEXT: v_mov_b32_e32 v1, s7 49; GFX7-NEXT: flat_load_dword v2, v[0:1] 50; GFX7-NEXT: v_mov_b32_e32 v0, s4 51; GFX7-NEXT: v_mov_b32_e32 v1, s5 52; GFX7-NEXT: s_waitcnt vmcnt(0) 53; GFX7-NEXT: flat_store_dword v[0:1], v2 54; GFX7-NEXT: s_endpgm 55; 56; GFX10-WGP-LABEL: global_agent_unordered_load: 57; GFX10-WGP: ; %bb.0: ; %entry 58; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 59; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 60; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 61; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 62; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 63; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 64; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 65; GFX10-WGP-NEXT: s_endpgm 66; 67; GFX10-CU-LABEL: global_agent_unordered_load: 68; GFX10-CU: ; %bb.0: ; %entry 69; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 70; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 71; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 72; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 73; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 74; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 75; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 76; GFX10-CU-NEXT: s_endpgm 77; 78; SKIP-CACHE-INV-LABEL: global_agent_unordered_load: 79; SKIP-CACHE-INV: ; %bb.0: ; %entry 80; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 81; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 82; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 83; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 84; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 85; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 86; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 87; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 88; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 89; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 90; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 91; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 92; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 93; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 94; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 95; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 96; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 97; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 98; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 99; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 100; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 101; SKIP-CACHE-INV-NEXT: s_endpgm 102; 103; GFX90A-NOTTGSPLIT-LABEL: global_agent_unordered_load: 104; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 105; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 106; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 107; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 108; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 109; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 110; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 111; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 112; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 113; 114; GFX90A-TGSPLIT-LABEL: global_agent_unordered_load: 115; GFX90A-TGSPLIT: ; %bb.0: ; %entry 116; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 117; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 118; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 119; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 120; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 121; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 122; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 123; GFX90A-TGSPLIT-NEXT: s_endpgm 124; 125; GFX940-NOTTGSPLIT-LABEL: global_agent_unordered_load: 126; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 127; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 128; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 129; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 130; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 131; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 132; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 133; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 134; GFX940-NOTTGSPLIT-NEXT: s_endpgm 135; 136; GFX940-TGSPLIT-LABEL: global_agent_unordered_load: 137; GFX940-TGSPLIT: ; %bb.0: ; %entry 138; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 139; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 140; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 141; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 142; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 143; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 144; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 145; GFX940-TGSPLIT-NEXT: s_endpgm 146; 147; GFX11-WGP-LABEL: global_agent_unordered_load: 148; GFX11-WGP: ; %bb.0: ; %entry 149; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 150; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 151; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 152; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 153; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 154; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 155; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 156; GFX11-WGP-NEXT: s_endpgm 157; 158; GFX11-CU-LABEL: global_agent_unordered_load: 159; GFX11-CU: ; %bb.0: ; %entry 160; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 161; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 162; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 163; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 164; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 165; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 166; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 167; GFX11-CU-NEXT: s_endpgm 168; 169; GFX12-WGP-LABEL: global_agent_unordered_load: 170; GFX12-WGP: ; %bb.0: ; %entry 171; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 172; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 173; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 174; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 175; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 176; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 177; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 178; GFX12-WGP-NEXT: s_endpgm 179; 180; GFX12-CU-LABEL: global_agent_unordered_load: 181; GFX12-CU: ; %bb.0: ; %entry 182; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 183; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 184; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 185; GFX12-CU-NEXT: s_wait_kmcnt 0x0 186; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 187; GFX12-CU-NEXT: s_wait_loadcnt 0x0 188; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 189; GFX12-CU-NEXT: s_endpgm 190 ptr addrspace(1) %in, ptr addrspace(1) %out) { 191entry: 192 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") unordered, align 4 193 store i32 %val, ptr addrspace(1) %out 194 ret void 195} 196 197define amdgpu_kernel void @global_agent_monotonic_load( 198; GFX6-LABEL: global_agent_monotonic_load: 199; GFX6: ; %bb.0: ; %entry 200; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 201; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 202; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 203; GFX6-NEXT: s_waitcnt lgkmcnt(0) 204; GFX6-NEXT: s_mov_b32 s6, s9 205; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 206; GFX6-NEXT: s_mov_b32 s12, 0x100f000 207; GFX6-NEXT: s_mov_b32 s13, -1 208; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 209; GFX6-NEXT: s_mov_b32 s9, s6 210; GFX6-NEXT: s_mov_b32 s10, s13 211; GFX6-NEXT: s_mov_b32 s11, s12 212; GFX6-NEXT: s_mov_b32 s14, s5 213; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 214; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 215; GFX6-NEXT: s_mov_b32 s5, s14 216; GFX6-NEXT: s_mov_b32 s6, s13 217; GFX6-NEXT: s_mov_b32 s7, s12 218; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 219; GFX6-NEXT: s_waitcnt vmcnt(0) 220; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 221; GFX6-NEXT: s_endpgm 222; 223; GFX7-LABEL: global_agent_monotonic_load: 224; GFX7: ; %bb.0: ; %entry 225; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 226; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 227; GFX7-NEXT: s_waitcnt lgkmcnt(0) 228; GFX7-NEXT: v_mov_b32_e32 v0, s6 229; GFX7-NEXT: v_mov_b32_e32 v1, s7 230; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 231; GFX7-NEXT: v_mov_b32_e32 v0, s4 232; GFX7-NEXT: v_mov_b32_e32 v1, s5 233; GFX7-NEXT: s_waitcnt vmcnt(0) 234; GFX7-NEXT: flat_store_dword v[0:1], v2 235; GFX7-NEXT: s_endpgm 236; 237; GFX10-WGP-LABEL: global_agent_monotonic_load: 238; GFX10-WGP: ; %bb.0: ; %entry 239; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 240; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 241; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 242; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 243; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 244; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 245; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 246; GFX10-WGP-NEXT: s_endpgm 247; 248; GFX10-CU-LABEL: global_agent_monotonic_load: 249; GFX10-CU: ; %bb.0: ; %entry 250; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 251; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 252; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 253; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 254; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 255; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 256; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 257; GFX10-CU-NEXT: s_endpgm 258; 259; SKIP-CACHE-INV-LABEL: global_agent_monotonic_load: 260; SKIP-CACHE-INV: ; %bb.0: ; %entry 261; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 262; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 263; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 264; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 265; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 266; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 267; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 268; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 269; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 270; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 271; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 272; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 273; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 274; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 275; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 276; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 277; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 278; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 279; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 280; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 281; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 282; SKIP-CACHE-INV-NEXT: s_endpgm 283; 284; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_load: 285; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 286; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 287; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 288; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 289; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 290; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 291; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 292; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 293; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 294; 295; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_load: 296; GFX90A-TGSPLIT: ; %bb.0: ; %entry 297; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 298; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 299; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 300; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 301; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 302; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 303; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 304; GFX90A-TGSPLIT-NEXT: s_endpgm 305; 306; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_load: 307; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 308; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 309; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 310; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 311; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 312; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 313; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 314; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 315; GFX940-NOTTGSPLIT-NEXT: s_endpgm 316; 317; GFX940-TGSPLIT-LABEL: global_agent_monotonic_load: 318; GFX940-TGSPLIT: ; %bb.0: ; %entry 319; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 320; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 321; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 322; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 323; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 324; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 325; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 326; GFX940-TGSPLIT-NEXT: s_endpgm 327; 328; GFX11-WGP-LABEL: global_agent_monotonic_load: 329; GFX11-WGP: ; %bb.0: ; %entry 330; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 331; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 332; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 333; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 334; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 335; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 336; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 337; GFX11-WGP-NEXT: s_endpgm 338; 339; GFX11-CU-LABEL: global_agent_monotonic_load: 340; GFX11-CU: ; %bb.0: ; %entry 341; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 342; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 343; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 344; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 345; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 346; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 347; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 348; GFX11-CU-NEXT: s_endpgm 349; 350; GFX12-WGP-LABEL: global_agent_monotonic_load: 351; GFX12-WGP: ; %bb.0: ; %entry 352; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 353; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 354; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 355; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 356; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 357; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 358; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 359; GFX12-WGP-NEXT: s_endpgm 360; 361; GFX12-CU-LABEL: global_agent_monotonic_load: 362; GFX12-CU: ; %bb.0: ; %entry 363; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 364; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 365; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 366; GFX12-CU-NEXT: s_wait_kmcnt 0x0 367; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 368; GFX12-CU-NEXT: s_wait_loadcnt 0x0 369; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 370; GFX12-CU-NEXT: s_endpgm 371 ptr addrspace(1) %in, ptr addrspace(1) %out) { 372entry: 373 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") monotonic, align 4 374 store i32 %val, ptr addrspace(1) %out 375 ret void 376} 377 378define amdgpu_kernel void @global_agent_acquire_load( 379; GFX6-LABEL: global_agent_acquire_load: 380; GFX6: ; %bb.0: ; %entry 381; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 382; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 383; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 384; GFX6-NEXT: s_waitcnt lgkmcnt(0) 385; GFX6-NEXT: s_mov_b32 s6, s9 386; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 387; GFX6-NEXT: s_mov_b32 s12, 0x100f000 388; GFX6-NEXT: s_mov_b32 s13, -1 389; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 390; GFX6-NEXT: s_mov_b32 s9, s6 391; GFX6-NEXT: s_mov_b32 s10, s13 392; GFX6-NEXT: s_mov_b32 s11, s12 393; GFX6-NEXT: s_mov_b32 s14, s5 394; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 395; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 396; GFX6-NEXT: s_mov_b32 s5, s14 397; GFX6-NEXT: s_mov_b32 s6, s13 398; GFX6-NEXT: s_mov_b32 s7, s12 399; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 400; GFX6-NEXT: s_waitcnt vmcnt(0) 401; GFX6-NEXT: buffer_wbinvl1 402; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 403; GFX6-NEXT: s_endpgm 404; 405; GFX7-LABEL: global_agent_acquire_load: 406; GFX7: ; %bb.0: ; %entry 407; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 408; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 409; GFX7-NEXT: s_waitcnt lgkmcnt(0) 410; GFX7-NEXT: v_mov_b32_e32 v0, s6 411; GFX7-NEXT: v_mov_b32_e32 v1, s7 412; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 413; GFX7-NEXT: s_waitcnt vmcnt(0) 414; GFX7-NEXT: buffer_wbinvl1_vol 415; GFX7-NEXT: v_mov_b32_e32 v0, s4 416; GFX7-NEXT: v_mov_b32_e32 v1, s5 417; GFX7-NEXT: flat_store_dword v[0:1], v2 418; GFX7-NEXT: s_endpgm 419; 420; GFX10-WGP-LABEL: global_agent_acquire_load: 421; GFX10-WGP: ; %bb.0: ; %entry 422; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 423; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 424; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 425; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 426; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 427; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 428; GFX10-WGP-NEXT: buffer_gl1_inv 429; GFX10-WGP-NEXT: buffer_gl0_inv 430; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 431; GFX10-WGP-NEXT: s_endpgm 432; 433; GFX10-CU-LABEL: global_agent_acquire_load: 434; GFX10-CU: ; %bb.0: ; %entry 435; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 436; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 437; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 438; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 439; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 440; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 441; GFX10-CU-NEXT: buffer_gl1_inv 442; GFX10-CU-NEXT: buffer_gl0_inv 443; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 444; GFX10-CU-NEXT: s_endpgm 445; 446; SKIP-CACHE-INV-LABEL: global_agent_acquire_load: 447; SKIP-CACHE-INV: ; %bb.0: ; %entry 448; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 449; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 450; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 451; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 452; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 453; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 454; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 455; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 456; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 457; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 458; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 459; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 460; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 461; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 462; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 463; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 464; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 465; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 466; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 467; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 468; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 469; SKIP-CACHE-INV-NEXT: s_endpgm 470; 471; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_load: 472; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 473; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 474; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 475; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 476; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 477; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 478; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 479; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 480; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 481; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 482; 483; GFX90A-TGSPLIT-LABEL: global_agent_acquire_load: 484; GFX90A-TGSPLIT: ; %bb.0: ; %entry 485; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 486; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 487; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 488; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 489; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 490; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 491; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 492; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 493; GFX90A-TGSPLIT-NEXT: s_endpgm 494; 495; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_load: 496; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 497; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 498; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 499; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 500; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 501; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 502; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 503; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 504; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 505; GFX940-NOTTGSPLIT-NEXT: s_endpgm 506; 507; GFX940-TGSPLIT-LABEL: global_agent_acquire_load: 508; GFX940-TGSPLIT: ; %bb.0: ; %entry 509; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 510; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 511; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 512; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 513; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 514; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 515; GFX940-TGSPLIT-NEXT: buffer_inv sc1 516; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 517; GFX940-TGSPLIT-NEXT: s_endpgm 518; 519; GFX11-WGP-LABEL: global_agent_acquire_load: 520; GFX11-WGP: ; %bb.0: ; %entry 521; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 522; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 523; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 524; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 525; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 526; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 527; GFX11-WGP-NEXT: buffer_gl1_inv 528; GFX11-WGP-NEXT: buffer_gl0_inv 529; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 530; GFX11-WGP-NEXT: s_endpgm 531; 532; GFX11-CU-LABEL: global_agent_acquire_load: 533; GFX11-CU: ; %bb.0: ; %entry 534; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 535; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 536; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 537; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 538; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 539; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 540; GFX11-CU-NEXT: buffer_gl1_inv 541; GFX11-CU-NEXT: buffer_gl0_inv 542; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 543; GFX11-CU-NEXT: s_endpgm 544; 545; GFX12-WGP-LABEL: global_agent_acquire_load: 546; GFX12-WGP: ; %bb.0: ; %entry 547; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 548; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 549; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 550; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 551; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 552; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 553; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 554; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 555; GFX12-WGP-NEXT: s_endpgm 556; 557; GFX12-CU-LABEL: global_agent_acquire_load: 558; GFX12-CU: ; %bb.0: ; %entry 559; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 560; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 561; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 562; GFX12-CU-NEXT: s_wait_kmcnt 0x0 563; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 564; GFX12-CU-NEXT: s_wait_loadcnt 0x0 565; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 566; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 567; GFX12-CU-NEXT: s_endpgm 568 ptr addrspace(1) %in, ptr addrspace(1) %out) { 569entry: 570 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") acquire, align 4 571 store i32 %val, ptr addrspace(1) %out 572 ret void 573} 574 575define amdgpu_kernel void @global_agent_seq_cst_load( 576; GFX6-LABEL: global_agent_seq_cst_load: 577; GFX6: ; %bb.0: ; %entry 578; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 579; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 580; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 581; GFX6-NEXT: s_waitcnt lgkmcnt(0) 582; GFX6-NEXT: s_mov_b32 s6, s9 583; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 584; GFX6-NEXT: s_mov_b32 s12, 0x100f000 585; GFX6-NEXT: s_mov_b32 s13, -1 586; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 587; GFX6-NEXT: s_mov_b32 s9, s6 588; GFX6-NEXT: s_mov_b32 s10, s13 589; GFX6-NEXT: s_mov_b32 s11, s12 590; GFX6-NEXT: s_mov_b32 s14, s5 591; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 592; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 593; GFX6-NEXT: s_mov_b32 s5, s14 594; GFX6-NEXT: s_mov_b32 s6, s13 595; GFX6-NEXT: s_mov_b32 s7, s12 596; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 597; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 598; GFX6-NEXT: s_waitcnt vmcnt(0) 599; GFX6-NEXT: buffer_wbinvl1 600; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 601; GFX6-NEXT: s_endpgm 602; 603; GFX7-LABEL: global_agent_seq_cst_load: 604; GFX7: ; %bb.0: ; %entry 605; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 606; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 607; GFX7-NEXT: s_waitcnt lgkmcnt(0) 608; GFX7-NEXT: v_mov_b32_e32 v0, s6 609; GFX7-NEXT: v_mov_b32_e32 v1, s7 610; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 611; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 612; GFX7-NEXT: s_waitcnt vmcnt(0) 613; GFX7-NEXT: buffer_wbinvl1_vol 614; GFX7-NEXT: v_mov_b32_e32 v0, s4 615; GFX7-NEXT: v_mov_b32_e32 v1, s5 616; GFX7-NEXT: flat_store_dword v[0:1], v2 617; GFX7-NEXT: s_endpgm 618; 619; GFX10-WGP-LABEL: global_agent_seq_cst_load: 620; GFX10-WGP: ; %bb.0: ; %entry 621; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 622; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 623; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 624; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 625; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 626; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 627; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 628; GFX10-WGP-NEXT: buffer_gl1_inv 629; GFX10-WGP-NEXT: buffer_gl0_inv 630; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 631; GFX10-WGP-NEXT: s_endpgm 632; 633; GFX10-CU-LABEL: global_agent_seq_cst_load: 634; GFX10-CU: ; %bb.0: ; %entry 635; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 636; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 637; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 638; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 639; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 640; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 641; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 642; GFX10-CU-NEXT: buffer_gl1_inv 643; GFX10-CU-NEXT: buffer_gl0_inv 644; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 645; GFX10-CU-NEXT: s_endpgm 646; 647; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_load: 648; SKIP-CACHE-INV: ; %bb.0: ; %entry 649; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 650; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 651; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 652; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 653; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 654; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 655; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 656; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 657; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 658; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 659; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 660; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 661; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 662; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 663; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 664; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 665; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 666; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 667; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 668; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 669; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 670; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 671; SKIP-CACHE-INV-NEXT: s_endpgm 672; 673; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_load: 674; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 675; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 676; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 677; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 678; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 679; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 680; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 681; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 682; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 683; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 684; 685; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_load: 686; GFX90A-TGSPLIT: ; %bb.0: ; %entry 687; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 688; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 689; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 690; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 691; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 692; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 693; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 694; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 695; GFX90A-TGSPLIT-NEXT: s_endpgm 696; 697; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_load: 698; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 699; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 700; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 701; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 702; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 703; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 704; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 705; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 706; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 707; GFX940-NOTTGSPLIT-NEXT: s_endpgm 708; 709; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_load: 710; GFX940-TGSPLIT: ; %bb.0: ; %entry 711; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 712; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 713; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 714; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 715; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 716; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 717; GFX940-TGSPLIT-NEXT: buffer_inv sc1 718; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 719; GFX940-TGSPLIT-NEXT: s_endpgm 720; 721; GFX11-WGP-LABEL: global_agent_seq_cst_load: 722; GFX11-WGP: ; %bb.0: ; %entry 723; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 724; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 725; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 726; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 727; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 728; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 729; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 730; GFX11-WGP-NEXT: buffer_gl1_inv 731; GFX11-WGP-NEXT: buffer_gl0_inv 732; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 733; GFX11-WGP-NEXT: s_endpgm 734; 735; GFX11-CU-LABEL: global_agent_seq_cst_load: 736; GFX11-CU: ; %bb.0: ; %entry 737; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 738; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 739; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 740; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 741; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 742; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 743; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 744; GFX11-CU-NEXT: buffer_gl1_inv 745; GFX11-CU-NEXT: buffer_gl0_inv 746; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 747; GFX11-CU-NEXT: s_endpgm 748; 749; GFX12-WGP-LABEL: global_agent_seq_cst_load: 750; GFX12-WGP: ; %bb.0: ; %entry 751; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 752; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 753; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 754; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 755; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 756; GFX12-WGP-NEXT: s_wait_storecnt 0x0 757; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 758; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 759; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 760; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 761; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 762; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 763; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 764; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 765; GFX12-WGP-NEXT: s_endpgm 766; 767; GFX12-CU-LABEL: global_agent_seq_cst_load: 768; GFX12-CU: ; %bb.0: ; %entry 769; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 770; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 771; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 772; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 773; GFX12-CU-NEXT: s_wait_samplecnt 0x0 774; GFX12-CU-NEXT: s_wait_storecnt 0x0 775; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 776; GFX12-CU-NEXT: s_wait_kmcnt 0x0 777; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 778; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 779; GFX12-CU-NEXT: s_wait_samplecnt 0x0 780; GFX12-CU-NEXT: s_wait_loadcnt 0x0 781; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 782; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 783; GFX12-CU-NEXT: s_endpgm 784 ptr addrspace(1) %in, ptr addrspace(1) %out) { 785entry: 786 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") seq_cst, align 4 787 store i32 %val, ptr addrspace(1) %out 788 ret void 789} 790 791define amdgpu_kernel void @global_agent_unordered_store( 792; GFX6-LABEL: global_agent_unordered_store: 793; GFX6: ; %bb.0: ; %entry 794; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 795; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 796; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 797; GFX6-NEXT: s_waitcnt lgkmcnt(0) 798; GFX6-NEXT: s_mov_b32 s11, s5 799; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 800; GFX6-NEXT: s_mov_b32 s9, 0x100f000 801; GFX6-NEXT: s_mov_b32 s10, -1 802; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 803; GFX6-NEXT: s_mov_b32 s5, s11 804; GFX6-NEXT: s_mov_b32 s6, s10 805; GFX6-NEXT: s_mov_b32 s7, s9 806; GFX6-NEXT: v_mov_b32_e32 v0, s8 807; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 808; GFX6-NEXT: s_endpgm 809; 810; GFX7-LABEL: global_agent_unordered_store: 811; GFX7: ; %bb.0: ; %entry 812; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 813; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 814; GFX7-NEXT: s_waitcnt lgkmcnt(0) 815; GFX7-NEXT: v_mov_b32_e32 v0, s6 816; GFX7-NEXT: v_mov_b32_e32 v1, s7 817; GFX7-NEXT: v_mov_b32_e32 v2, s4 818; GFX7-NEXT: flat_store_dword v[0:1], v2 819; GFX7-NEXT: s_endpgm 820; 821; GFX10-WGP-LABEL: global_agent_unordered_store: 822; GFX10-WGP: ; %bb.0: ; %entry 823; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 824; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 825; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 826; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 827; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 828; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 829; GFX10-WGP-NEXT: s_endpgm 830; 831; GFX10-CU-LABEL: global_agent_unordered_store: 832; GFX10-CU: ; %bb.0: ; %entry 833; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 834; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 835; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 836; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 837; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 838; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 839; GFX10-CU-NEXT: s_endpgm 840; 841; SKIP-CACHE-INV-LABEL: global_agent_unordered_store: 842; SKIP-CACHE-INV: ; %bb.0: ; %entry 843; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 844; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 845; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 846; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 847; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 848; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 849; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 850; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 851; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 852; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 853; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 854; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 855; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 856; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 857; SKIP-CACHE-INV-NEXT: s_endpgm 858; 859; GFX90A-NOTTGSPLIT-LABEL: global_agent_unordered_store: 860; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 861; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 862; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 863; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 864; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 865; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 866; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 867; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 868; 869; GFX90A-TGSPLIT-LABEL: global_agent_unordered_store: 870; GFX90A-TGSPLIT: ; %bb.0: ; %entry 871; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 872; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 873; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 874; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 875; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 876; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 877; GFX90A-TGSPLIT-NEXT: s_endpgm 878; 879; GFX940-NOTTGSPLIT-LABEL: global_agent_unordered_store: 880; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 881; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 882; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 883; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 884; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 885; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 886; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 887; GFX940-NOTTGSPLIT-NEXT: s_endpgm 888; 889; GFX940-TGSPLIT-LABEL: global_agent_unordered_store: 890; GFX940-TGSPLIT: ; %bb.0: ; %entry 891; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 892; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 893; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 894; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 895; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 896; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 897; GFX940-TGSPLIT-NEXT: s_endpgm 898; 899; GFX11-WGP-LABEL: global_agent_unordered_store: 900; GFX11-WGP: ; %bb.0: ; %entry 901; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 902; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 903; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 904; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 905; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 906; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 907; GFX11-WGP-NEXT: s_endpgm 908; 909; GFX11-CU-LABEL: global_agent_unordered_store: 910; GFX11-CU: ; %bb.0: ; %entry 911; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 912; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 913; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 914; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 915; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 916; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 917; GFX11-CU-NEXT: s_endpgm 918; 919; GFX12-WGP-LABEL: global_agent_unordered_store: 920; GFX12-WGP: ; %bb.0: ; %entry 921; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 922; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 923; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 924; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 925; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 926; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 927; GFX12-WGP-NEXT: s_endpgm 928; 929; GFX12-CU-LABEL: global_agent_unordered_store: 930; GFX12-CU: ; %bb.0: ; %entry 931; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 932; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 933; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 934; GFX12-CU-NEXT: s_wait_kmcnt 0x0 935; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 936; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 937; GFX12-CU-NEXT: s_endpgm 938 i32 %in, ptr addrspace(1) %out) { 939entry: 940 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") unordered, align 4 941 ret void 942} 943 944define amdgpu_kernel void @global_agent_monotonic_store( 945; GFX6-LABEL: global_agent_monotonic_store: 946; GFX6: ; %bb.0: ; %entry 947; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 948; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 949; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 950; GFX6-NEXT: s_waitcnt lgkmcnt(0) 951; GFX6-NEXT: s_mov_b32 s11, s5 952; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 953; GFX6-NEXT: s_mov_b32 s9, 0x100f000 954; GFX6-NEXT: s_mov_b32 s10, -1 955; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 956; GFX6-NEXT: s_mov_b32 s5, s11 957; GFX6-NEXT: s_mov_b32 s6, s10 958; GFX6-NEXT: s_mov_b32 s7, s9 959; GFX6-NEXT: v_mov_b32_e32 v0, s8 960; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 961; GFX6-NEXT: s_endpgm 962; 963; GFX7-LABEL: global_agent_monotonic_store: 964; GFX7: ; %bb.0: ; %entry 965; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 966; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 967; GFX7-NEXT: s_waitcnt lgkmcnt(0) 968; GFX7-NEXT: v_mov_b32_e32 v0, s6 969; GFX7-NEXT: v_mov_b32_e32 v1, s7 970; GFX7-NEXT: v_mov_b32_e32 v2, s4 971; GFX7-NEXT: flat_store_dword v[0:1], v2 972; GFX7-NEXT: s_endpgm 973; 974; GFX10-WGP-LABEL: global_agent_monotonic_store: 975; GFX10-WGP: ; %bb.0: ; %entry 976; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 977; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 978; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 979; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 980; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 981; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 982; GFX10-WGP-NEXT: s_endpgm 983; 984; GFX10-CU-LABEL: global_agent_monotonic_store: 985; GFX10-CU: ; %bb.0: ; %entry 986; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 987; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 988; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 989; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 990; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 991; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 992; GFX10-CU-NEXT: s_endpgm 993; 994; SKIP-CACHE-INV-LABEL: global_agent_monotonic_store: 995; SKIP-CACHE-INV: ; %bb.0: ; %entry 996; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 997; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 998; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 999; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1000; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1001; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1002; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1003; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1004; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1005; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1006; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1007; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1008; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1009; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1010; SKIP-CACHE-INV-NEXT: s_endpgm 1011; 1012; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_store: 1013; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1014; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1015; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1016; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1017; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1018; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1019; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1020; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1021; 1022; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_store: 1023; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1024; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1025; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1026; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1027; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1028; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1029; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1030; GFX90A-TGSPLIT-NEXT: s_endpgm 1031; 1032; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_store: 1033; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1034; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1035; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1036; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1037; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1038; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1039; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1040; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1041; 1042; GFX940-TGSPLIT-LABEL: global_agent_monotonic_store: 1043; GFX940-TGSPLIT: ; %bb.0: ; %entry 1044; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1045; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1046; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1047; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1048; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1049; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1050; GFX940-TGSPLIT-NEXT: s_endpgm 1051; 1052; GFX11-WGP-LABEL: global_agent_monotonic_store: 1053; GFX11-WGP: ; %bb.0: ; %entry 1054; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1055; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1056; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1057; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1058; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1059; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1060; GFX11-WGP-NEXT: s_endpgm 1061; 1062; GFX11-CU-LABEL: global_agent_monotonic_store: 1063; GFX11-CU: ; %bb.0: ; %entry 1064; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1065; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1066; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1067; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1068; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1069; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1070; GFX11-CU-NEXT: s_endpgm 1071; 1072; GFX12-WGP-LABEL: global_agent_monotonic_store: 1073; GFX12-WGP: ; %bb.0: ; %entry 1074; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1075; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1076; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1077; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1078; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1079; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1080; GFX12-WGP-NEXT: s_endpgm 1081; 1082; GFX12-CU-LABEL: global_agent_monotonic_store: 1083; GFX12-CU: ; %bb.0: ; %entry 1084; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1085; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1086; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1087; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1088; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1089; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1090; GFX12-CU-NEXT: s_endpgm 1091 i32 %in, ptr addrspace(1) %out) { 1092entry: 1093 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4 1094 ret void 1095} 1096 1097define amdgpu_kernel void @global_agent_release_store( 1098; GFX6-LABEL: global_agent_release_store: 1099; GFX6: ; %bb.0: ; %entry 1100; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 1101; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 1102; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1103; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1104; GFX6-NEXT: s_mov_b32 s11, s5 1105; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1106; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1107; GFX6-NEXT: s_mov_b32 s10, -1 1108; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1109; GFX6-NEXT: s_mov_b32 s5, s11 1110; GFX6-NEXT: s_mov_b32 s6, s10 1111; GFX6-NEXT: s_mov_b32 s7, s9 1112; GFX6-NEXT: v_mov_b32_e32 v0, s8 1113; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1114; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1115; GFX6-NEXT: s_endpgm 1116; 1117; GFX7-LABEL: global_agent_release_store: 1118; GFX7: ; %bb.0: ; %entry 1119; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1120; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1121; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1122; GFX7-NEXT: v_mov_b32_e32 v0, s6 1123; GFX7-NEXT: v_mov_b32_e32 v1, s7 1124; GFX7-NEXT: v_mov_b32_e32 v2, s4 1125; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1126; GFX7-NEXT: flat_store_dword v[0:1], v2 1127; GFX7-NEXT: s_endpgm 1128; 1129; GFX10-WGP-LABEL: global_agent_release_store: 1130; GFX10-WGP: ; %bb.0: ; %entry 1131; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 1132; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1133; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1134; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1135; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1136; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1137; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1138; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 1139; GFX10-WGP-NEXT: s_endpgm 1140; 1141; GFX10-CU-LABEL: global_agent_release_store: 1142; GFX10-CU: ; %bb.0: ; %entry 1143; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 1144; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1145; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1146; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1147; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1148; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1149; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1150; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 1151; GFX10-CU-NEXT: s_endpgm 1152; 1153; SKIP-CACHE-INV-LABEL: global_agent_release_store: 1154; SKIP-CACHE-INV: ; %bb.0: ; %entry 1155; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1156; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1157; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1158; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1159; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1160; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1161; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1162; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1163; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1164; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1165; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1166; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1167; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1168; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1169; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1170; SKIP-CACHE-INV-NEXT: s_endpgm 1171; 1172; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_store: 1173; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1174; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1175; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1176; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1177; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1178; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1179; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1180; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1181; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1182; 1183; GFX90A-TGSPLIT-LABEL: global_agent_release_store: 1184; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1185; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1186; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1187; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1188; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1189; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1190; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1191; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1192; GFX90A-TGSPLIT-NEXT: s_endpgm 1193; 1194; GFX940-NOTTGSPLIT-LABEL: global_agent_release_store: 1195; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1196; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1197; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1198; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1199; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1200; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1201; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 1202; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1203; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1204; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1205; 1206; GFX940-TGSPLIT-LABEL: global_agent_release_store: 1207; GFX940-TGSPLIT: ; %bb.0: ; %entry 1208; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1209; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1210; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1211; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1212; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1213; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 1214; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1215; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1216; GFX940-TGSPLIT-NEXT: s_endpgm 1217; 1218; GFX11-WGP-LABEL: global_agent_release_store: 1219; GFX11-WGP: ; %bb.0: ; %entry 1220; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1221; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1222; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1223; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1224; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1225; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1226; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1227; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1228; GFX11-WGP-NEXT: s_endpgm 1229; 1230; GFX11-CU-LABEL: global_agent_release_store: 1231; GFX11-CU: ; %bb.0: ; %entry 1232; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1233; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1234; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1235; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1236; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1237; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1238; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1239; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1240; GFX11-CU-NEXT: s_endpgm 1241; 1242; GFX12-WGP-LABEL: global_agent_release_store: 1243; GFX12-WGP: ; %bb.0: ; %entry 1244; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1245; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1246; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1247; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1248; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1249; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1250; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1251; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1252; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1253; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1254; GFX12-WGP-NEXT: s_endpgm 1255; 1256; GFX12-CU-LABEL: global_agent_release_store: 1257; GFX12-CU: ; %bb.0: ; %entry 1258; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1259; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1260; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1261; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1262; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1263; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1264; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1265; GFX12-CU-NEXT: s_wait_storecnt 0x0 1266; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1267; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1268; GFX12-CU-NEXT: s_endpgm 1269 i32 %in, ptr addrspace(1) %out) { 1270entry: 1271 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") release, align 4 1272 ret void 1273} 1274 1275define amdgpu_kernel void @global_agent_seq_cst_store( 1276; GFX6-LABEL: global_agent_seq_cst_store: 1277; GFX6: ; %bb.0: ; %entry 1278; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 1279; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 1280; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1281; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1282; GFX6-NEXT: s_mov_b32 s11, s5 1283; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1284; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1285; GFX6-NEXT: s_mov_b32 s10, -1 1286; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1287; GFX6-NEXT: s_mov_b32 s5, s11 1288; GFX6-NEXT: s_mov_b32 s6, s10 1289; GFX6-NEXT: s_mov_b32 s7, s9 1290; GFX6-NEXT: v_mov_b32_e32 v0, s8 1291; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1292; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1293; GFX6-NEXT: s_endpgm 1294; 1295; GFX7-LABEL: global_agent_seq_cst_store: 1296; GFX7: ; %bb.0: ; %entry 1297; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1298; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1299; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1300; GFX7-NEXT: v_mov_b32_e32 v0, s6 1301; GFX7-NEXT: v_mov_b32_e32 v1, s7 1302; GFX7-NEXT: v_mov_b32_e32 v2, s4 1303; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1304; GFX7-NEXT: flat_store_dword v[0:1], v2 1305; GFX7-NEXT: s_endpgm 1306; 1307; GFX10-WGP-LABEL: global_agent_seq_cst_store: 1308; GFX10-WGP: ; %bb.0: ; %entry 1309; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 1310; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1311; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1312; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1313; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1314; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1315; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1316; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 1317; GFX10-WGP-NEXT: s_endpgm 1318; 1319; GFX10-CU-LABEL: global_agent_seq_cst_store: 1320; GFX10-CU: ; %bb.0: ; %entry 1321; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 1322; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1323; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1324; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1325; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1326; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1327; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1328; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 1329; GFX10-CU-NEXT: s_endpgm 1330; 1331; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_store: 1332; SKIP-CACHE-INV: ; %bb.0: ; %entry 1333; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1334; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1335; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1336; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1337; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1338; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1339; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1340; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1341; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1342; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1343; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1344; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1345; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1346; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1347; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1348; SKIP-CACHE-INV-NEXT: s_endpgm 1349; 1350; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_store: 1351; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1352; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1353; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1354; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1355; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1356; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1357; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1358; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1359; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1360; 1361; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_store: 1362; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1363; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1364; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1365; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1366; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1367; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1368; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1369; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1370; GFX90A-TGSPLIT-NEXT: s_endpgm 1371; 1372; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_store: 1373; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1374; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1375; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1376; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1377; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1378; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1379; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 1380; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1381; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1382; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1383; 1384; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_store: 1385; GFX940-TGSPLIT: ; %bb.0: ; %entry 1386; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1387; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1388; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1389; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1390; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1391; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 1392; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1393; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1394; GFX940-TGSPLIT-NEXT: s_endpgm 1395; 1396; GFX11-WGP-LABEL: global_agent_seq_cst_store: 1397; GFX11-WGP: ; %bb.0: ; %entry 1398; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1399; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1400; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1401; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1402; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1403; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1404; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1405; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1406; GFX11-WGP-NEXT: s_endpgm 1407; 1408; GFX11-CU-LABEL: global_agent_seq_cst_store: 1409; GFX11-CU: ; %bb.0: ; %entry 1410; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1411; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1412; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1413; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1414; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1415; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1416; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1417; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1418; GFX11-CU-NEXT: s_endpgm 1419; 1420; GFX12-WGP-LABEL: global_agent_seq_cst_store: 1421; GFX12-WGP: ; %bb.0: ; %entry 1422; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1423; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1424; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1425; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1426; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1427; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1428; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1429; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1430; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1431; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1432; GFX12-WGP-NEXT: s_endpgm 1433; 1434; GFX12-CU-LABEL: global_agent_seq_cst_store: 1435; GFX12-CU: ; %bb.0: ; %entry 1436; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1437; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1438; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1439; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1440; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1441; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1442; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1443; GFX12-CU-NEXT: s_wait_storecnt 0x0 1444; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1445; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1446; GFX12-CU-NEXT: s_endpgm 1447 i32 %in, ptr addrspace(1) %out) { 1448entry: 1449 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4 1450 ret void 1451} 1452 1453define amdgpu_kernel void @global_agent_monotonic_atomicrmw( 1454; GFX6-LABEL: global_agent_monotonic_atomicrmw: 1455; GFX6: ; %bb.0: ; %entry 1456; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1457; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1458; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1459; GFX6-NEXT: s_mov_b32 s11, s5 1460; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1461; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1462; GFX6-NEXT: s_mov_b32 s10, -1 1463; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1464; GFX6-NEXT: s_mov_b32 s5, s11 1465; GFX6-NEXT: s_mov_b32 s6, s10 1466; GFX6-NEXT: s_mov_b32 s7, s9 1467; GFX6-NEXT: v_mov_b32_e32 v0, s8 1468; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1469; GFX6-NEXT: s_endpgm 1470; 1471; GFX7-LABEL: global_agent_monotonic_atomicrmw: 1472; GFX7: ; %bb.0: ; %entry 1473; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1474; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1475; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1476; GFX7-NEXT: v_mov_b32_e32 v0, s6 1477; GFX7-NEXT: v_mov_b32_e32 v1, s7 1478; GFX7-NEXT: v_mov_b32_e32 v2, s4 1479; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1480; GFX7-NEXT: s_endpgm 1481; 1482; GFX10-WGP-LABEL: global_agent_monotonic_atomicrmw: 1483; GFX10-WGP: ; %bb.0: ; %entry 1484; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1485; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1486; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1487; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1488; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1489; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1490; GFX10-WGP-NEXT: s_endpgm 1491; 1492; GFX10-CU-LABEL: global_agent_monotonic_atomicrmw: 1493; GFX10-CU: ; %bb.0: ; %entry 1494; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1495; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1496; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1497; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1498; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1499; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1500; GFX10-CU-NEXT: s_endpgm 1501; 1502; SKIP-CACHE-INV-LABEL: global_agent_monotonic_atomicrmw: 1503; SKIP-CACHE-INV: ; %bb.0: ; %entry 1504; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1505; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1506; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1507; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1508; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1509; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1510; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1511; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1512; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1513; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1514; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1515; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1516; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1517; SKIP-CACHE-INV-NEXT: s_endpgm 1518; 1519; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw: 1520; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1521; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1522; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1523; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1524; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1525; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1526; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1527; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1528; 1529; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw: 1530; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1531; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1532; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1533; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1534; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1535; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1536; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1537; GFX90A-TGSPLIT-NEXT: s_endpgm 1538; 1539; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw: 1540; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1541; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1542; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1543; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1544; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1545; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1546; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1547; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1548; 1549; GFX940-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw: 1550; GFX940-TGSPLIT: ; %bb.0: ; %entry 1551; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1552; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1553; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1554; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1555; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1556; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1557; GFX940-TGSPLIT-NEXT: s_endpgm 1558; 1559; GFX11-WGP-LABEL: global_agent_monotonic_atomicrmw: 1560; GFX11-WGP: ; %bb.0: ; %entry 1561; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1562; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1563; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1564; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1565; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1566; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1567; GFX11-WGP-NEXT: s_endpgm 1568; 1569; GFX11-CU-LABEL: global_agent_monotonic_atomicrmw: 1570; GFX11-CU: ; %bb.0: ; %entry 1571; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1572; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1573; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1574; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1576; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1577; GFX11-CU-NEXT: s_endpgm 1578; 1579; GFX12-WGP-LABEL: global_agent_monotonic_atomicrmw: 1580; GFX12-WGP: ; %bb.0: ; %entry 1581; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1582; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1583; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1584; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1585; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1586; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1587; GFX12-WGP-NEXT: s_endpgm 1588; 1589; GFX12-CU-LABEL: global_agent_monotonic_atomicrmw: 1590; GFX12-CU: ; %bb.0: ; %entry 1591; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1592; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1593; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1594; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1595; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1596; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1597; GFX12-CU-NEXT: s_endpgm 1598 ptr addrspace(1) %out, i32 %in) { 1599entry: 1600 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") monotonic 1601 ret void 1602} 1603 1604define amdgpu_kernel void @global_agent_acquire_atomicrmw( 1605; GFX6-LABEL: global_agent_acquire_atomicrmw: 1606; GFX6: ; %bb.0: ; %entry 1607; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1608; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1609; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1610; GFX6-NEXT: s_mov_b32 s11, s5 1611; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1612; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1613; GFX6-NEXT: s_mov_b32 s10, -1 1614; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1615; GFX6-NEXT: s_mov_b32 s5, s11 1616; GFX6-NEXT: s_mov_b32 s6, s10 1617; GFX6-NEXT: s_mov_b32 s7, s9 1618; GFX6-NEXT: v_mov_b32_e32 v0, s8 1619; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1620; GFX6-NEXT: s_waitcnt vmcnt(0) 1621; GFX6-NEXT: buffer_wbinvl1 1622; GFX6-NEXT: s_endpgm 1623; 1624; GFX7-LABEL: global_agent_acquire_atomicrmw: 1625; GFX7: ; %bb.0: ; %entry 1626; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1627; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1628; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1629; GFX7-NEXT: v_mov_b32_e32 v0, s6 1630; GFX7-NEXT: v_mov_b32_e32 v1, s7 1631; GFX7-NEXT: v_mov_b32_e32 v2, s4 1632; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1633; GFX7-NEXT: s_waitcnt vmcnt(0) 1634; GFX7-NEXT: buffer_wbinvl1_vol 1635; GFX7-NEXT: s_endpgm 1636; 1637; GFX10-WGP-LABEL: global_agent_acquire_atomicrmw: 1638; GFX10-WGP: ; %bb.0: ; %entry 1639; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1640; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1641; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1642; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1643; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1644; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1645; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1646; GFX10-WGP-NEXT: buffer_gl1_inv 1647; GFX10-WGP-NEXT: buffer_gl0_inv 1648; GFX10-WGP-NEXT: s_endpgm 1649; 1650; GFX10-CU-LABEL: global_agent_acquire_atomicrmw: 1651; GFX10-CU: ; %bb.0: ; %entry 1652; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1653; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1654; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1655; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1656; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1657; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1658; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1659; GFX10-CU-NEXT: buffer_gl1_inv 1660; GFX10-CU-NEXT: buffer_gl0_inv 1661; GFX10-CU-NEXT: s_endpgm 1662; 1663; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw: 1664; SKIP-CACHE-INV: ; %bb.0: ; %entry 1665; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1666; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1667; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1668; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1669; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1670; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1671; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1672; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1673; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1674; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1675; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1676; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1677; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1678; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1679; SKIP-CACHE-INV-NEXT: s_endpgm 1680; 1681; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw: 1682; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1683; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1684; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1685; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1686; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1687; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1688; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1689; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1690; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1691; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1692; 1693; GFX90A-TGSPLIT-LABEL: global_agent_acquire_atomicrmw: 1694; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1695; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1696; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1697; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1698; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1699; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1700; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1701; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1702; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1703; GFX90A-TGSPLIT-NEXT: s_endpgm 1704; 1705; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw: 1706; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1707; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1708; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1709; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1710; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1711; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1712; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1713; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1714; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 1715; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1716; 1717; GFX940-TGSPLIT-LABEL: global_agent_acquire_atomicrmw: 1718; GFX940-TGSPLIT: ; %bb.0: ; %entry 1719; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1720; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1721; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1722; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1723; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1724; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1725; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1726; GFX940-TGSPLIT-NEXT: buffer_inv sc1 1727; GFX940-TGSPLIT-NEXT: s_endpgm 1728; 1729; GFX11-WGP-LABEL: global_agent_acquire_atomicrmw: 1730; GFX11-WGP: ; %bb.0: ; %entry 1731; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1732; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1733; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1734; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1735; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1736; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1737; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1738; GFX11-WGP-NEXT: buffer_gl1_inv 1739; GFX11-WGP-NEXT: buffer_gl0_inv 1740; GFX11-WGP-NEXT: s_endpgm 1741; 1742; GFX11-CU-LABEL: global_agent_acquire_atomicrmw: 1743; GFX11-CU: ; %bb.0: ; %entry 1744; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1745; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1746; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1747; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1748; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1749; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1750; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1751; GFX11-CU-NEXT: buffer_gl1_inv 1752; GFX11-CU-NEXT: buffer_gl0_inv 1753; GFX11-CU-NEXT: s_endpgm 1754; 1755; GFX12-WGP-LABEL: global_agent_acquire_atomicrmw: 1756; GFX12-WGP: ; %bb.0: ; %entry 1757; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1758; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1759; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1760; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1761; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1762; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1763; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1764; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 1765; GFX12-WGP-NEXT: s_endpgm 1766; 1767; GFX12-CU-LABEL: global_agent_acquire_atomicrmw: 1768; GFX12-CU: ; %bb.0: ; %entry 1769; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1770; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1771; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1772; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1773; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1774; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1775; GFX12-CU-NEXT: s_wait_storecnt 0x0 1776; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 1777; GFX12-CU-NEXT: s_endpgm 1778 ptr addrspace(1) %out, i32 %in) { 1779entry: 1780 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire 1781 ret void 1782} 1783 1784define amdgpu_kernel void @global_agent_release_atomicrmw( 1785; GFX6-LABEL: global_agent_release_atomicrmw: 1786; GFX6: ; %bb.0: ; %entry 1787; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1788; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1789; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1790; GFX6-NEXT: s_mov_b32 s11, s5 1791; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1792; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1793; GFX6-NEXT: s_mov_b32 s10, -1 1794; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1795; GFX6-NEXT: s_mov_b32 s5, s11 1796; GFX6-NEXT: s_mov_b32 s6, s10 1797; GFX6-NEXT: s_mov_b32 s7, s9 1798; GFX6-NEXT: v_mov_b32_e32 v0, s8 1799; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1800; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1801; GFX6-NEXT: s_endpgm 1802; 1803; GFX7-LABEL: global_agent_release_atomicrmw: 1804; GFX7: ; %bb.0: ; %entry 1805; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1806; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1807; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1808; GFX7-NEXT: v_mov_b32_e32 v0, s6 1809; GFX7-NEXT: v_mov_b32_e32 v1, s7 1810; GFX7-NEXT: v_mov_b32_e32 v2, s4 1811; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1812; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1813; GFX7-NEXT: s_endpgm 1814; 1815; GFX10-WGP-LABEL: global_agent_release_atomicrmw: 1816; GFX10-WGP: ; %bb.0: ; %entry 1817; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1818; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1819; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1820; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1821; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1822; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1823; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1824; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1825; GFX10-WGP-NEXT: s_endpgm 1826; 1827; GFX10-CU-LABEL: global_agent_release_atomicrmw: 1828; GFX10-CU: ; %bb.0: ; %entry 1829; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1830; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1831; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1832; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1833; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1834; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1835; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1836; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1837; GFX10-CU-NEXT: s_endpgm 1838; 1839; SKIP-CACHE-INV-LABEL: global_agent_release_atomicrmw: 1840; SKIP-CACHE-INV: ; %bb.0: ; %entry 1841; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1842; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1843; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1844; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1845; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1846; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1847; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1848; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1849; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1850; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1851; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1852; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1853; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1854; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1855; SKIP-CACHE-INV-NEXT: s_endpgm 1856; 1857; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw: 1858; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1859; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1860; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1861; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1862; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1863; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1864; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1865; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1866; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1867; 1868; GFX90A-TGSPLIT-LABEL: global_agent_release_atomicrmw: 1869; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1870; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1871; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1872; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1873; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1874; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1875; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1876; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1877; GFX90A-TGSPLIT-NEXT: s_endpgm 1878; 1879; GFX940-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw: 1880; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1881; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1882; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1883; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1884; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1885; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1886; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 1887; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1888; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1889; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1890; 1891; GFX940-TGSPLIT-LABEL: global_agent_release_atomicrmw: 1892; GFX940-TGSPLIT: ; %bb.0: ; %entry 1893; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1894; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1895; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1896; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1897; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1898; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 1899; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1900; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1901; GFX940-TGSPLIT-NEXT: s_endpgm 1902; 1903; GFX11-WGP-LABEL: global_agent_release_atomicrmw: 1904; GFX11-WGP: ; %bb.0: ; %entry 1905; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1906; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1907; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1908; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1909; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1910; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1911; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1912; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1913; GFX11-WGP-NEXT: s_endpgm 1914; 1915; GFX11-CU-LABEL: global_agent_release_atomicrmw: 1916; GFX11-CU: ; %bb.0: ; %entry 1917; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1918; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1919; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1920; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1921; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1922; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1923; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1924; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1925; GFX11-CU-NEXT: s_endpgm 1926; 1927; GFX12-WGP-LABEL: global_agent_release_atomicrmw: 1928; GFX12-WGP: ; %bb.0: ; %entry 1929; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1930; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1931; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1932; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1933; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1934; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1935; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1936; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1937; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1938; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1939; GFX12-WGP-NEXT: s_endpgm 1940; 1941; GFX12-CU-LABEL: global_agent_release_atomicrmw: 1942; GFX12-CU: ; %bb.0: ; %entry 1943; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1944; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1945; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1946; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1947; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1948; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1949; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1950; GFX12-CU-NEXT: s_wait_storecnt 0x0 1951; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1952; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 1953; GFX12-CU-NEXT: s_endpgm 1954 ptr addrspace(1) %out, i32 %in) { 1955entry: 1956 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") release 1957 ret void 1958} 1959 1960define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( 1961; GFX6-LABEL: global_agent_acq_rel_atomicrmw: 1962; GFX6: ; %bb.0: ; %entry 1963; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1964; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1965; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1966; GFX6-NEXT: s_mov_b32 s11, s5 1967; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1968; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1969; GFX6-NEXT: s_mov_b32 s10, -1 1970; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1971; GFX6-NEXT: s_mov_b32 s5, s11 1972; GFX6-NEXT: s_mov_b32 s6, s10 1973; GFX6-NEXT: s_mov_b32 s7, s9 1974; GFX6-NEXT: v_mov_b32_e32 v0, s8 1975; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1976; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1977; GFX6-NEXT: s_waitcnt vmcnt(0) 1978; GFX6-NEXT: buffer_wbinvl1 1979; GFX6-NEXT: s_endpgm 1980; 1981; GFX7-LABEL: global_agent_acq_rel_atomicrmw: 1982; GFX7: ; %bb.0: ; %entry 1983; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1984; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1985; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1986; GFX7-NEXT: v_mov_b32_e32 v0, s6 1987; GFX7-NEXT: v_mov_b32_e32 v1, s7 1988; GFX7-NEXT: v_mov_b32_e32 v2, s4 1989; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1990; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1991; GFX7-NEXT: s_waitcnt vmcnt(0) 1992; GFX7-NEXT: buffer_wbinvl1_vol 1993; GFX7-NEXT: s_endpgm 1994; 1995; GFX10-WGP-LABEL: global_agent_acq_rel_atomicrmw: 1996; GFX10-WGP: ; %bb.0: ; %entry 1997; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1998; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1999; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2000; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2001; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2002; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2003; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2004; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 2005; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2006; GFX10-WGP-NEXT: buffer_gl1_inv 2007; GFX10-WGP-NEXT: buffer_gl0_inv 2008; GFX10-WGP-NEXT: s_endpgm 2009; 2010; GFX10-CU-LABEL: global_agent_acq_rel_atomicrmw: 2011; GFX10-CU: ; %bb.0: ; %entry 2012; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2013; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2014; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2015; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2016; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2017; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2018; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2019; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 2020; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2021; GFX10-CU-NEXT: buffer_gl1_inv 2022; GFX10-CU-NEXT: buffer_gl0_inv 2023; GFX10-CU-NEXT: s_endpgm 2024; 2025; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw: 2026; SKIP-CACHE-INV: ; %bb.0: ; %entry 2027; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2028; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2029; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2030; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2031; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2032; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2033; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2034; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2035; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2036; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2037; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2038; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2039; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2040; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 2041; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2042; SKIP-CACHE-INV-NEXT: s_endpgm 2043; 2044; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: 2045; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2046; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2047; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2048; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2049; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2050; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2051; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2052; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2053; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2054; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2055; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2056; 2057; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: 2058; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2059; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2060; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2061; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2062; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2063; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2064; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2065; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2066; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2067; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2068; GFX90A-TGSPLIT-NEXT: s_endpgm 2069; 2070; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: 2071; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2072; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2073; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2074; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2075; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2076; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2077; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 2078; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2079; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 2080; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2081; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 2082; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2083; 2084; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: 2085; GFX940-TGSPLIT: ; %bb.0: ; %entry 2086; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2087; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2088; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2089; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2090; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2091; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 2092; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2093; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 2094; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2095; GFX940-TGSPLIT-NEXT: buffer_inv sc1 2096; GFX940-TGSPLIT-NEXT: s_endpgm 2097; 2098; GFX11-WGP-LABEL: global_agent_acq_rel_atomicrmw: 2099; GFX11-WGP: ; %bb.0: ; %entry 2100; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2101; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2102; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2103; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2104; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2105; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2106; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2107; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2108; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2109; GFX11-WGP-NEXT: buffer_gl1_inv 2110; GFX11-WGP-NEXT: buffer_gl0_inv 2111; GFX11-WGP-NEXT: s_endpgm 2112; 2113; GFX11-CU-LABEL: global_agent_acq_rel_atomicrmw: 2114; GFX11-CU: ; %bb.0: ; %entry 2115; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2116; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2117; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2118; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2119; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2120; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2121; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2122; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2123; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2124; GFX11-CU-NEXT: buffer_gl1_inv 2125; GFX11-CU-NEXT: buffer_gl0_inv 2126; GFX11-CU-NEXT: s_endpgm 2127; 2128; GFX12-WGP-LABEL: global_agent_acq_rel_atomicrmw: 2129; GFX12-WGP: ; %bb.0: ; %entry 2130; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2131; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2132; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2133; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2134; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2135; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2136; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2137; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2138; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2139; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 2140; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2141; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 2142; GFX12-WGP-NEXT: s_endpgm 2143; 2144; GFX12-CU-LABEL: global_agent_acq_rel_atomicrmw: 2145; GFX12-CU: ; %bb.0: ; %entry 2146; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2147; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2148; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2149; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2150; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2151; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2152; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2153; GFX12-CU-NEXT: s_wait_storecnt 0x0 2154; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2155; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 2156; GFX12-CU-NEXT: s_wait_storecnt 0x0 2157; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 2158; GFX12-CU-NEXT: s_endpgm 2159 ptr addrspace(1) %out, i32 %in) { 2160entry: 2161 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel 2162 ret void 2163} 2164 2165define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( 2166; GFX6-LABEL: global_agent_seq_cst_atomicrmw: 2167; GFX6: ; %bb.0: ; %entry 2168; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2169; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2171; GFX6-NEXT: s_mov_b32 s11, s5 2172; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2173; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2174; GFX6-NEXT: s_mov_b32 s10, -1 2175; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2176; GFX6-NEXT: s_mov_b32 s5, s11 2177; GFX6-NEXT: s_mov_b32 s6, s10 2178; GFX6-NEXT: s_mov_b32 s7, s9 2179; GFX6-NEXT: v_mov_b32_e32 v0, s8 2180; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2181; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 2182; GFX6-NEXT: s_waitcnt vmcnt(0) 2183; GFX6-NEXT: buffer_wbinvl1 2184; GFX6-NEXT: s_endpgm 2185; 2186; GFX7-LABEL: global_agent_seq_cst_atomicrmw: 2187; GFX7: ; %bb.0: ; %entry 2188; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2189; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 2190; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2191; GFX7-NEXT: v_mov_b32_e32 v0, s6 2192; GFX7-NEXT: v_mov_b32_e32 v1, s7 2193; GFX7-NEXT: v_mov_b32_e32 v2, s4 2194; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2195; GFX7-NEXT: flat_atomic_swap v[0:1], v2 2196; GFX7-NEXT: s_waitcnt vmcnt(0) 2197; GFX7-NEXT: buffer_wbinvl1_vol 2198; GFX7-NEXT: s_endpgm 2199; 2200; GFX10-WGP-LABEL: global_agent_seq_cst_atomicrmw: 2201; GFX10-WGP: ; %bb.0: ; %entry 2202; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2203; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2204; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2205; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2206; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2207; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2208; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2209; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 2210; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2211; GFX10-WGP-NEXT: buffer_gl1_inv 2212; GFX10-WGP-NEXT: buffer_gl0_inv 2213; GFX10-WGP-NEXT: s_endpgm 2214; 2215; GFX10-CU-LABEL: global_agent_seq_cst_atomicrmw: 2216; GFX10-CU: ; %bb.0: ; %entry 2217; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2218; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2219; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2220; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2221; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2222; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2223; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2224; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 2225; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2226; GFX10-CU-NEXT: buffer_gl1_inv 2227; GFX10-CU-NEXT: buffer_gl0_inv 2228; GFX10-CU-NEXT: s_endpgm 2229; 2230; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw: 2231; SKIP-CACHE-INV: ; %bb.0: ; %entry 2232; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2233; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2234; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2235; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2236; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2237; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2238; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2239; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2240; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2241; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2242; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2243; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2244; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2245; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 2246; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2247; SKIP-CACHE-INV-NEXT: s_endpgm 2248; 2249; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: 2250; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2251; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2252; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2253; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2254; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2255; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2256; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2257; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2258; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2259; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2260; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2261; 2262; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: 2263; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2264; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2265; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2266; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2267; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2268; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2269; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2270; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2271; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2272; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2273; GFX90A-TGSPLIT-NEXT: s_endpgm 2274; 2275; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: 2276; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2277; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2278; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2279; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2280; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2281; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2282; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 2283; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2284; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 2285; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2286; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 2287; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2288; 2289; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: 2290; GFX940-TGSPLIT: ; %bb.0: ; %entry 2291; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2292; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2293; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2294; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2295; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2296; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 2297; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2298; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 2299; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2300; GFX940-TGSPLIT-NEXT: buffer_inv sc1 2301; GFX940-TGSPLIT-NEXT: s_endpgm 2302; 2303; GFX11-WGP-LABEL: global_agent_seq_cst_atomicrmw: 2304; GFX11-WGP: ; %bb.0: ; %entry 2305; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2306; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2307; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2308; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2309; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2310; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2311; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2312; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2313; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2314; GFX11-WGP-NEXT: buffer_gl1_inv 2315; GFX11-WGP-NEXT: buffer_gl0_inv 2316; GFX11-WGP-NEXT: s_endpgm 2317; 2318; GFX11-CU-LABEL: global_agent_seq_cst_atomicrmw: 2319; GFX11-CU: ; %bb.0: ; %entry 2320; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2321; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2322; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2323; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2324; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2325; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2326; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2327; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2328; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2329; GFX11-CU-NEXT: buffer_gl1_inv 2330; GFX11-CU-NEXT: buffer_gl0_inv 2331; GFX11-CU-NEXT: s_endpgm 2332; 2333; GFX12-WGP-LABEL: global_agent_seq_cst_atomicrmw: 2334; GFX12-WGP: ; %bb.0: ; %entry 2335; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2336; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2337; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2338; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2339; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2340; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2341; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2342; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2343; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2344; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 2345; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2346; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 2347; GFX12-WGP-NEXT: s_endpgm 2348; 2349; GFX12-CU-LABEL: global_agent_seq_cst_atomicrmw: 2350; GFX12-CU: ; %bb.0: ; %entry 2351; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2352; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2353; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2354; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2355; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2356; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2357; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2358; GFX12-CU-NEXT: s_wait_storecnt 0x0 2359; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2360; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 2361; GFX12-CU-NEXT: s_wait_storecnt 0x0 2362; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 2363; GFX12-CU-NEXT: s_endpgm 2364 ptr addrspace(1) %out, i32 %in) { 2365entry: 2366 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst 2367 ret void 2368} 2369 2370define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( 2371; GFX6-LABEL: global_agent_acquire_ret_atomicrmw: 2372; GFX6: ; %bb.0: ; %entry 2373; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2374; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2375; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2376; GFX6-NEXT: s_mov_b32 s11, s5 2377; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2378; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2379; GFX6-NEXT: s_mov_b32 s10, -1 2380; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2381; GFX6-NEXT: s_mov_b32 s5, s11 2382; GFX6-NEXT: s_mov_b32 s6, s10 2383; GFX6-NEXT: s_mov_b32 s7, s9 2384; GFX6-NEXT: v_mov_b32_e32 v0, s8 2385; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2386; GFX6-NEXT: s_waitcnt vmcnt(0) 2387; GFX6-NEXT: buffer_wbinvl1 2388; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2389; GFX6-NEXT: s_endpgm 2390; 2391; GFX7-LABEL: global_agent_acquire_ret_atomicrmw: 2392; GFX7: ; %bb.0: ; %entry 2393; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2394; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2395; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2396; GFX7-NEXT: v_mov_b32_e32 v0, s4 2397; GFX7-NEXT: v_mov_b32_e32 v1, s5 2398; GFX7-NEXT: v_mov_b32_e32 v2, s6 2399; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2400; GFX7-NEXT: s_waitcnt vmcnt(0) 2401; GFX7-NEXT: buffer_wbinvl1_vol 2402; GFX7-NEXT: v_mov_b32_e32 v0, s4 2403; GFX7-NEXT: v_mov_b32_e32 v1, s5 2404; GFX7-NEXT: flat_store_dword v[0:1], v2 2405; GFX7-NEXT: s_endpgm 2406; 2407; GFX10-WGP-LABEL: global_agent_acquire_ret_atomicrmw: 2408; GFX10-WGP: ; %bb.0: ; %entry 2409; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2410; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2411; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2412; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2413; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2414; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2415; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2416; GFX10-WGP-NEXT: buffer_gl1_inv 2417; GFX10-WGP-NEXT: buffer_gl0_inv 2418; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2419; GFX10-WGP-NEXT: s_endpgm 2420; 2421; GFX10-CU-LABEL: global_agent_acquire_ret_atomicrmw: 2422; GFX10-CU: ; %bb.0: ; %entry 2423; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2424; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2425; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2426; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2427; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2428; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2429; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2430; GFX10-CU-NEXT: buffer_gl1_inv 2431; GFX10-CU-NEXT: buffer_gl0_inv 2432; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2433; GFX10-CU-NEXT: s_endpgm 2434; 2435; SKIP-CACHE-INV-LABEL: global_agent_acquire_ret_atomicrmw: 2436; SKIP-CACHE-INV: ; %bb.0: ; %entry 2437; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2438; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2439; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2440; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2441; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2442; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2443; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2444; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2445; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2446; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2447; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2448; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2449; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2450; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2451; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2452; SKIP-CACHE-INV-NEXT: s_endpgm 2453; 2454; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: 2455; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2456; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2457; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2458; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2459; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2460; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2461; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2462; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2463; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2464; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2465; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2466; 2467; GFX90A-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: 2468; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2469; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2470; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2471; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2472; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2473; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2474; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2475; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2476; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2477; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2478; GFX90A-TGSPLIT-NEXT: s_endpgm 2479; 2480; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: 2481; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2482; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2483; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2484; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2485; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2486; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2487; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2488; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2489; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 2490; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2491; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2492; 2493; GFX940-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: 2494; GFX940-TGSPLIT: ; %bb.0: ; %entry 2495; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2496; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2497; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2498; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2499; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2500; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2501; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2502; GFX940-TGSPLIT-NEXT: buffer_inv sc1 2503; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2504; GFX940-TGSPLIT-NEXT: s_endpgm 2505; 2506; GFX11-WGP-LABEL: global_agent_acquire_ret_atomicrmw: 2507; GFX11-WGP: ; %bb.0: ; %entry 2508; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2509; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2510; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2511; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2512; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2513; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2514; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2515; GFX11-WGP-NEXT: buffer_gl1_inv 2516; GFX11-WGP-NEXT: buffer_gl0_inv 2517; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2518; GFX11-WGP-NEXT: s_endpgm 2519; 2520; GFX11-CU-LABEL: global_agent_acquire_ret_atomicrmw: 2521; GFX11-CU: ; %bb.0: ; %entry 2522; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2523; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2524; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2525; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2526; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2527; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2528; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2529; GFX11-CU-NEXT: buffer_gl1_inv 2530; GFX11-CU-NEXT: buffer_gl0_inv 2531; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2532; GFX11-CU-NEXT: s_endpgm 2533; 2534; GFX12-WGP-LABEL: global_agent_acquire_ret_atomicrmw: 2535; GFX12-WGP: ; %bb.0: ; %entry 2536; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2537; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2538; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2539; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2540; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2541; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 2542; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2543; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 2544; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2545; GFX12-WGP-NEXT: s_endpgm 2546; 2547; GFX12-CU-LABEL: global_agent_acquire_ret_atomicrmw: 2548; GFX12-CU: ; %bb.0: ; %entry 2549; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2550; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2551; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2552; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2553; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2554; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 2555; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2556; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 2557; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2558; GFX12-CU-NEXT: s_endpgm 2559 ptr addrspace(1) %out, i32 %in) { 2560entry: 2561 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire 2562 store i32 %val, ptr addrspace(1) %out, align 4 2563 ret void 2564} 2565 2566define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( 2567; GFX6-LABEL: global_agent_acq_rel_ret_atomicrmw: 2568; GFX6: ; %bb.0: ; %entry 2569; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2570; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2571; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2572; GFX6-NEXT: s_mov_b32 s11, s5 2573; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2574; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2575; GFX6-NEXT: s_mov_b32 s10, -1 2576; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2577; GFX6-NEXT: s_mov_b32 s5, s11 2578; GFX6-NEXT: s_mov_b32 s6, s10 2579; GFX6-NEXT: s_mov_b32 s7, s9 2580; GFX6-NEXT: v_mov_b32_e32 v0, s8 2581; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2582; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2583; GFX6-NEXT: s_waitcnt vmcnt(0) 2584; GFX6-NEXT: buffer_wbinvl1 2585; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2586; GFX6-NEXT: s_endpgm 2587; 2588; GFX7-LABEL: global_agent_acq_rel_ret_atomicrmw: 2589; GFX7: ; %bb.0: ; %entry 2590; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2591; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2592; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2593; GFX7-NEXT: v_mov_b32_e32 v0, s4 2594; GFX7-NEXT: v_mov_b32_e32 v1, s5 2595; GFX7-NEXT: v_mov_b32_e32 v2, s6 2596; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2597; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2598; GFX7-NEXT: s_waitcnt vmcnt(0) 2599; GFX7-NEXT: buffer_wbinvl1_vol 2600; GFX7-NEXT: v_mov_b32_e32 v0, s4 2601; GFX7-NEXT: v_mov_b32_e32 v1, s5 2602; GFX7-NEXT: flat_store_dword v[0:1], v2 2603; GFX7-NEXT: s_endpgm 2604; 2605; GFX10-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw: 2606; GFX10-WGP: ; %bb.0: ; %entry 2607; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2608; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2609; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2610; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2611; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2612; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2613; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2614; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2615; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2616; GFX10-WGP-NEXT: buffer_gl1_inv 2617; GFX10-WGP-NEXT: buffer_gl0_inv 2618; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2619; GFX10-WGP-NEXT: s_endpgm 2620; 2621; GFX10-CU-LABEL: global_agent_acq_rel_ret_atomicrmw: 2622; GFX10-CU: ; %bb.0: ; %entry 2623; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2624; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2625; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2626; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2627; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2628; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2629; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2630; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2631; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2632; GFX10-CU-NEXT: buffer_gl1_inv 2633; GFX10-CU-NEXT: buffer_gl0_inv 2634; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2635; GFX10-CU-NEXT: s_endpgm 2636; 2637; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_ret_atomicrmw: 2638; SKIP-CACHE-INV: ; %bb.0: ; %entry 2639; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2640; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2641; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2642; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2643; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2644; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2645; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2646; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2647; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2648; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2649; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2650; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2651; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2652; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2653; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2654; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2655; SKIP-CACHE-INV-NEXT: s_endpgm 2656; 2657; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: 2658; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2659; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2660; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2661; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2662; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2663; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2664; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2665; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2666; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2667; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2668; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2669; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2670; 2671; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: 2672; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2673; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2674; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2675; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2676; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2677; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2678; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2679; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2680; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2681; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2682; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2683; GFX90A-TGSPLIT-NEXT: s_endpgm 2684; 2685; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: 2686; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2687; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2688; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2689; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2690; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2691; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2692; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 2693; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2694; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2695; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2696; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 2697; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2698; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2699; 2700; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: 2701; GFX940-TGSPLIT: ; %bb.0: ; %entry 2702; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2703; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2704; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2705; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2706; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2707; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 2708; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2709; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2710; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2711; GFX940-TGSPLIT-NEXT: buffer_inv sc1 2712; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2713; GFX940-TGSPLIT-NEXT: s_endpgm 2714; 2715; GFX11-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw: 2716; GFX11-WGP: ; %bb.0: ; %entry 2717; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2718; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2719; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2720; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2721; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2722; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2723; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2724; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2725; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2726; GFX11-WGP-NEXT: buffer_gl1_inv 2727; GFX11-WGP-NEXT: buffer_gl0_inv 2728; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2729; GFX11-WGP-NEXT: s_endpgm 2730; 2731; GFX11-CU-LABEL: global_agent_acq_rel_ret_atomicrmw: 2732; GFX11-CU: ; %bb.0: ; %entry 2733; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2734; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2735; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2736; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2737; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2738; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2739; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2740; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2741; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2742; GFX11-CU-NEXT: buffer_gl1_inv 2743; GFX11-CU-NEXT: buffer_gl0_inv 2744; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2745; GFX11-CU-NEXT: s_endpgm 2746; 2747; GFX12-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw: 2748; GFX12-WGP: ; %bb.0: ; %entry 2749; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2750; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2751; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2752; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2753; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2754; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2755; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2756; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2757; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2758; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 2759; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2760; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2761; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2762; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 2763; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2764; GFX12-WGP-NEXT: s_endpgm 2765; 2766; GFX12-CU-LABEL: global_agent_acq_rel_ret_atomicrmw: 2767; GFX12-CU: ; %bb.0: ; %entry 2768; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2769; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2770; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2771; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2772; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2773; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2774; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2775; GFX12-CU-NEXT: s_wait_storecnt 0x0 2776; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2777; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 2778; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2779; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2780; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2781; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 2782; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2783; GFX12-CU-NEXT: s_endpgm 2784 ptr addrspace(1) %out, i32 %in) { 2785entry: 2786 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel 2787 store i32 %val, ptr addrspace(1) %out, align 4 2788 ret void 2789} 2790 2791define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( 2792; GFX6-LABEL: global_agent_seq_cst_ret_atomicrmw: 2793; GFX6: ; %bb.0: ; %entry 2794; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2795; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2796; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2797; GFX6-NEXT: s_mov_b32 s11, s5 2798; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2799; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2800; GFX6-NEXT: s_mov_b32 s10, -1 2801; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2802; GFX6-NEXT: s_mov_b32 s5, s11 2803; GFX6-NEXT: s_mov_b32 s6, s10 2804; GFX6-NEXT: s_mov_b32 s7, s9 2805; GFX6-NEXT: v_mov_b32_e32 v0, s8 2806; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2807; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2808; GFX6-NEXT: s_waitcnt vmcnt(0) 2809; GFX6-NEXT: buffer_wbinvl1 2810; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2811; GFX6-NEXT: s_endpgm 2812; 2813; GFX7-LABEL: global_agent_seq_cst_ret_atomicrmw: 2814; GFX7: ; %bb.0: ; %entry 2815; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2816; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2817; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2818; GFX7-NEXT: v_mov_b32_e32 v0, s4 2819; GFX7-NEXT: v_mov_b32_e32 v1, s5 2820; GFX7-NEXT: v_mov_b32_e32 v2, s6 2821; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2822; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2823; GFX7-NEXT: s_waitcnt vmcnt(0) 2824; GFX7-NEXT: buffer_wbinvl1_vol 2825; GFX7-NEXT: v_mov_b32_e32 v0, s4 2826; GFX7-NEXT: v_mov_b32_e32 v1, s5 2827; GFX7-NEXT: flat_store_dword v[0:1], v2 2828; GFX7-NEXT: s_endpgm 2829; 2830; GFX10-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw: 2831; GFX10-WGP: ; %bb.0: ; %entry 2832; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2833; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2834; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2835; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2836; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2837; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2838; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2839; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2840; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2841; GFX10-WGP-NEXT: buffer_gl1_inv 2842; GFX10-WGP-NEXT: buffer_gl0_inv 2843; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2844; GFX10-WGP-NEXT: s_endpgm 2845; 2846; GFX10-CU-LABEL: global_agent_seq_cst_ret_atomicrmw: 2847; GFX10-CU: ; %bb.0: ; %entry 2848; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2849; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2850; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2851; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2852; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2853; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2854; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2855; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2856; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2857; GFX10-CU-NEXT: buffer_gl1_inv 2858; GFX10-CU-NEXT: buffer_gl0_inv 2859; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2860; GFX10-CU-NEXT: s_endpgm 2861; 2862; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_ret_atomicrmw: 2863; SKIP-CACHE-INV: ; %bb.0: ; %entry 2864; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2865; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2866; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2867; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2868; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2869; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2870; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2871; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2872; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2873; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2874; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2875; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2876; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2877; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2878; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2879; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2880; SKIP-CACHE-INV-NEXT: s_endpgm 2881; 2882; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: 2883; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2884; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2885; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2886; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2887; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2888; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2889; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2890; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2891; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2892; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2893; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2894; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2895; 2896; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: 2897; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2898; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2899; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2900; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2901; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2902; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2903; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2904; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2905; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2906; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2907; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2908; GFX90A-TGSPLIT-NEXT: s_endpgm 2909; 2910; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: 2911; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2912; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2913; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2914; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2915; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2916; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2917; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 2918; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2919; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2920; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2921; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 2922; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2923; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2924; 2925; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: 2926; GFX940-TGSPLIT: ; %bb.0: ; %entry 2927; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2928; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2929; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2930; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2931; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2932; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 2933; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2934; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2935; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2936; GFX940-TGSPLIT-NEXT: buffer_inv sc1 2937; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2938; GFX940-TGSPLIT-NEXT: s_endpgm 2939; 2940; GFX11-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw: 2941; GFX11-WGP: ; %bb.0: ; %entry 2942; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2943; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2944; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2945; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2946; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2947; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2948; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2949; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2950; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2951; GFX11-WGP-NEXT: buffer_gl1_inv 2952; GFX11-WGP-NEXT: buffer_gl0_inv 2953; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2954; GFX11-WGP-NEXT: s_endpgm 2955; 2956; GFX11-CU-LABEL: global_agent_seq_cst_ret_atomicrmw: 2957; GFX11-CU: ; %bb.0: ; %entry 2958; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2959; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2960; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2961; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2962; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2963; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2964; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2965; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2966; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2967; GFX11-CU-NEXT: buffer_gl1_inv 2968; GFX11-CU-NEXT: buffer_gl0_inv 2969; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2970; GFX11-CU-NEXT: s_endpgm 2971; 2972; GFX12-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw: 2973; GFX12-WGP: ; %bb.0: ; %entry 2974; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2975; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2976; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2977; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2978; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2979; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2980; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2981; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2982; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2983; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 2984; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2985; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2986; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2987; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 2988; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2989; GFX12-WGP-NEXT: s_endpgm 2990; 2991; GFX12-CU-LABEL: global_agent_seq_cst_ret_atomicrmw: 2992; GFX12-CU: ; %bb.0: ; %entry 2993; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2994; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2995; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2996; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2997; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2998; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2999; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3000; GFX12-CU-NEXT: s_wait_storecnt 0x0 3001; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3002; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 3003; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3004; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3005; GFX12-CU-NEXT: s_wait_loadcnt 0x0 3006; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 3007; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 3008; GFX12-CU-NEXT: s_endpgm 3009 ptr addrspace(1) %out, i32 %in) { 3010entry: 3011 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst 3012 store i32 %val, ptr addrspace(1) %out, align 4 3013 ret void 3014} 3015 3016define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( 3017; GFX6-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3018; GFX6: ; %bb.0: ; %entry 3019; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3020; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3021; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3022; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3023; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3024; GFX6-NEXT: s_mov_b32 s12, s5 3025; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3026; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3027; GFX6-NEXT: s_mov_b32 s11, -1 3028; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3029; GFX6-NEXT: s_mov_b32 s5, s12 3030; GFX6-NEXT: s_mov_b32 s6, s11 3031; GFX6-NEXT: s_mov_b32 s7, s10 3032; GFX6-NEXT: v_mov_b32_e32 v0, s9 3033; GFX6-NEXT: v_mov_b32_e32 v2, s8 3034; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3035; GFX6-NEXT: v_mov_b32_e32 v1, v2 3036; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3037; GFX6-NEXT: s_endpgm 3038; 3039; GFX7-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3040; GFX7: ; %bb.0: ; %entry 3041; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3042; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3043; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3044; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3045; GFX7-NEXT: s_mov_b64 s[10:11], 16 3046; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3047; GFX7-NEXT: s_mov_b32 s4, s8 3048; GFX7-NEXT: s_mov_b32 s5, s9 3049; GFX7-NEXT: s_mov_b32 s9, s10 3050; GFX7-NEXT: s_mov_b32 s8, s11 3051; GFX7-NEXT: s_add_u32 s4, s4, s9 3052; GFX7-NEXT: s_addc_u32 s8, s5, s8 3053; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3054; GFX7-NEXT: s_mov_b32 s5, s8 3055; GFX7-NEXT: v_mov_b32_e32 v2, s7 3056; GFX7-NEXT: v_mov_b32_e32 v0, s6 3057; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3058; GFX7-NEXT: v_mov_b32_e32 v3, v0 3059; GFX7-NEXT: v_mov_b32_e32 v0, s4 3060; GFX7-NEXT: v_mov_b32_e32 v1, s5 3061; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3062; GFX7-NEXT: s_endpgm 3063; 3064; GFX10-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3065; GFX10-WGP: ; %bb.0: ; %entry 3066; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3067; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3068; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3069; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3070; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3071; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3072; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3073; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3074; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3075; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3076; GFX10-WGP-NEXT: s_endpgm 3077; 3078; GFX10-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3079; GFX10-CU: ; %bb.0: ; %entry 3080; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3081; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3082; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3083; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3084; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3085; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3086; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3087; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3088; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3089; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3090; GFX10-CU-NEXT: s_endpgm 3091; 3092; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3093; SKIP-CACHE-INV: ; %bb.0: ; %entry 3094; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3095; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3096; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3097; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3098; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3099; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3100; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3101; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3102; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3103; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3104; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3105; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3106; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3107; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3108; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3109; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3110; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3111; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3112; SKIP-CACHE-INV-NEXT: s_endpgm 3113; 3114; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3115; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3116; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3117; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3118; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3119; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3120; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3121; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3122; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3123; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3124; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3125; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3126; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3127; 3128; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3129; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3130; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3131; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3132; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3133; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3134; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3135; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3136; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3137; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3138; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3139; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3140; GFX90A-TGSPLIT-NEXT: s_endpgm 3141; 3142; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3143; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3144; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3145; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3146; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3147; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3148; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3149; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3150; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3151; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3152; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3153; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3154; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3155; 3156; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3157; GFX940-TGSPLIT: ; %bb.0: ; %entry 3158; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3159; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3160; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3161; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3162; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3163; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3164; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3165; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3166; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3167; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3168; GFX940-TGSPLIT-NEXT: s_endpgm 3169; 3170; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3171; GFX11-WGP: ; %bb.0: ; %entry 3172; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3173; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3174; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3175; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3176; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3177; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3178; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3179; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3180; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3181; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3182; GFX11-WGP-NEXT: s_endpgm 3183; 3184; GFX11-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3185; GFX11-CU: ; %bb.0: ; %entry 3186; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3187; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3188; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3189; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3190; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3191; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3192; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3193; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3194; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3195; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3196; GFX11-CU-NEXT: s_endpgm 3197; 3198; GFX12-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3199; GFX12-WGP: ; %bb.0: ; %entry 3200; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3201; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3202; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3203; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3204; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3205; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3206; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3207; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3208; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3209; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3210; GFX12-WGP-NEXT: s_endpgm 3211; 3212; GFX12-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg: 3213; GFX12-CU: ; %bb.0: ; %entry 3214; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3215; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3216; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3217; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3218; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3219; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3220; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3221; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3222; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3223; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3224; GFX12-CU-NEXT: s_endpgm 3225 ptr addrspace(1) %out, i32 %in, i32 %old) { 3226entry: 3227 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3228 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic 3229 ret void 3230} 3231 3232define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( 3233; GFX6-LABEL: global_agent_acquire_monotonic_cmpxchg: 3234; GFX6: ; %bb.0: ; %entry 3235; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3236; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3237; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3238; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3239; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3240; GFX6-NEXT: s_mov_b32 s12, s5 3241; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3242; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3243; GFX6-NEXT: s_mov_b32 s11, -1 3244; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3245; GFX6-NEXT: s_mov_b32 s5, s12 3246; GFX6-NEXT: s_mov_b32 s6, s11 3247; GFX6-NEXT: s_mov_b32 s7, s10 3248; GFX6-NEXT: v_mov_b32_e32 v0, s9 3249; GFX6-NEXT: v_mov_b32_e32 v2, s8 3250; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3251; GFX6-NEXT: v_mov_b32_e32 v1, v2 3252; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3253; GFX6-NEXT: s_waitcnt vmcnt(0) 3254; GFX6-NEXT: buffer_wbinvl1 3255; GFX6-NEXT: s_endpgm 3256; 3257; GFX7-LABEL: global_agent_acquire_monotonic_cmpxchg: 3258; GFX7: ; %bb.0: ; %entry 3259; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3260; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3261; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3262; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3263; GFX7-NEXT: s_mov_b64 s[10:11], 16 3264; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3265; GFX7-NEXT: s_mov_b32 s4, s8 3266; GFX7-NEXT: s_mov_b32 s5, s9 3267; GFX7-NEXT: s_mov_b32 s9, s10 3268; GFX7-NEXT: s_mov_b32 s8, s11 3269; GFX7-NEXT: s_add_u32 s4, s4, s9 3270; GFX7-NEXT: s_addc_u32 s8, s5, s8 3271; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3272; GFX7-NEXT: s_mov_b32 s5, s8 3273; GFX7-NEXT: v_mov_b32_e32 v2, s7 3274; GFX7-NEXT: v_mov_b32_e32 v0, s6 3275; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3276; GFX7-NEXT: v_mov_b32_e32 v3, v0 3277; GFX7-NEXT: v_mov_b32_e32 v0, s4 3278; GFX7-NEXT: v_mov_b32_e32 v1, s5 3279; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3280; GFX7-NEXT: s_waitcnt vmcnt(0) 3281; GFX7-NEXT: buffer_wbinvl1_vol 3282; GFX7-NEXT: s_endpgm 3283; 3284; GFX10-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg: 3285; GFX10-WGP: ; %bb.0: ; %entry 3286; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3287; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3288; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3289; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3290; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3291; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3292; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3293; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3294; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3295; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3296; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3297; GFX10-WGP-NEXT: buffer_gl1_inv 3298; GFX10-WGP-NEXT: buffer_gl0_inv 3299; GFX10-WGP-NEXT: s_endpgm 3300; 3301; GFX10-CU-LABEL: global_agent_acquire_monotonic_cmpxchg: 3302; GFX10-CU: ; %bb.0: ; %entry 3303; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3304; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3305; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3306; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3307; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3308; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3309; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3310; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3311; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3312; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3313; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3314; GFX10-CU-NEXT: buffer_gl1_inv 3315; GFX10-CU-NEXT: buffer_gl0_inv 3316; GFX10-CU-NEXT: s_endpgm 3317; 3318; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg: 3319; SKIP-CACHE-INV: ; %bb.0: ; %entry 3320; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3321; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3322; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3323; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3324; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3325; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3326; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3327; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3328; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3329; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3330; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3331; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3332; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3333; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3334; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3335; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3336; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3337; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3338; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3339; SKIP-CACHE-INV-NEXT: s_endpgm 3340; 3341; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: 3342; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3343; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3344; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3345; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3346; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3347; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3348; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3349; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3350; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3351; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3352; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3353; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3354; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3355; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3356; 3357; GFX90A-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: 3358; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3359; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3360; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3361; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3362; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3363; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3364; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3365; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3366; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3367; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3368; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3369; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3370; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3371; GFX90A-TGSPLIT-NEXT: s_endpgm 3372; 3373; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: 3374; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3375; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3376; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3377; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3378; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3379; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3380; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3381; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3382; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3383; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3384; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3385; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3386; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 3387; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3388; 3389; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: 3390; GFX940-TGSPLIT: ; %bb.0: ; %entry 3391; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3392; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3393; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3394; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3395; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3396; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3397; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3398; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3399; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3400; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3401; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3402; GFX940-TGSPLIT-NEXT: buffer_inv sc1 3403; GFX940-TGSPLIT-NEXT: s_endpgm 3404; 3405; GFX11-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg: 3406; GFX11-WGP: ; %bb.0: ; %entry 3407; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3408; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3409; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3410; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3411; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3412; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3413; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3414; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3415; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3416; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3417; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3418; GFX11-WGP-NEXT: buffer_gl1_inv 3419; GFX11-WGP-NEXT: buffer_gl0_inv 3420; GFX11-WGP-NEXT: s_endpgm 3421; 3422; GFX11-CU-LABEL: global_agent_acquire_monotonic_cmpxchg: 3423; GFX11-CU: ; %bb.0: ; %entry 3424; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3425; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3426; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3427; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3428; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3429; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3430; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3431; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3432; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3433; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3434; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3435; GFX11-CU-NEXT: buffer_gl1_inv 3436; GFX11-CU-NEXT: buffer_gl0_inv 3437; GFX11-CU-NEXT: s_endpgm 3438; 3439; GFX12-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg: 3440; GFX12-WGP: ; %bb.0: ; %entry 3441; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3442; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3443; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3444; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3445; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3446; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3447; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3448; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3449; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3450; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3451; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3452; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 3453; GFX12-WGP-NEXT: s_endpgm 3454; 3455; GFX12-CU-LABEL: global_agent_acquire_monotonic_cmpxchg: 3456; GFX12-CU: ; %bb.0: ; %entry 3457; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3458; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3459; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3460; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3461; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3462; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3463; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3464; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3465; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3466; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3467; GFX12-CU-NEXT: s_wait_storecnt 0x0 3468; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 3469; GFX12-CU-NEXT: s_endpgm 3470 ptr addrspace(1) %out, i32 %in, i32 %old) { 3471entry: 3472 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3473 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic 3474 ret void 3475} 3476 3477define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( 3478; GFX6-LABEL: global_agent_release_monotonic_cmpxchg: 3479; GFX6: ; %bb.0: ; %entry 3480; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3481; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3482; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3483; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3484; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3485; GFX6-NEXT: s_mov_b32 s12, s5 3486; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3487; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3488; GFX6-NEXT: s_mov_b32 s11, -1 3489; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3490; GFX6-NEXT: s_mov_b32 s5, s12 3491; GFX6-NEXT: s_mov_b32 s6, s11 3492; GFX6-NEXT: s_mov_b32 s7, s10 3493; GFX6-NEXT: v_mov_b32_e32 v0, s9 3494; GFX6-NEXT: v_mov_b32_e32 v2, s8 3495; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3496; GFX6-NEXT: v_mov_b32_e32 v1, v2 3497; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3498; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3499; GFX6-NEXT: s_endpgm 3500; 3501; GFX7-LABEL: global_agent_release_monotonic_cmpxchg: 3502; GFX7: ; %bb.0: ; %entry 3503; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3504; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3505; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3506; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3507; GFX7-NEXT: s_mov_b64 s[10:11], 16 3508; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3509; GFX7-NEXT: s_mov_b32 s4, s8 3510; GFX7-NEXT: s_mov_b32 s5, s9 3511; GFX7-NEXT: s_mov_b32 s9, s10 3512; GFX7-NEXT: s_mov_b32 s8, s11 3513; GFX7-NEXT: s_add_u32 s4, s4, s9 3514; GFX7-NEXT: s_addc_u32 s8, s5, s8 3515; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3516; GFX7-NEXT: s_mov_b32 s5, s8 3517; GFX7-NEXT: v_mov_b32_e32 v2, s7 3518; GFX7-NEXT: v_mov_b32_e32 v0, s6 3519; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3520; GFX7-NEXT: v_mov_b32_e32 v3, v0 3521; GFX7-NEXT: v_mov_b32_e32 v0, s4 3522; GFX7-NEXT: v_mov_b32_e32 v1, s5 3523; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3524; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3525; GFX7-NEXT: s_endpgm 3526; 3527; GFX10-WGP-LABEL: global_agent_release_monotonic_cmpxchg: 3528; GFX10-WGP: ; %bb.0: ; %entry 3529; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3530; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3531; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3532; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3533; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3534; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3535; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3536; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3537; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3538; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3539; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3540; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3541; GFX10-WGP-NEXT: s_endpgm 3542; 3543; GFX10-CU-LABEL: global_agent_release_monotonic_cmpxchg: 3544; GFX10-CU: ; %bb.0: ; %entry 3545; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3546; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3547; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3548; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3549; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3550; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3551; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3552; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3553; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3554; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3555; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3556; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3557; GFX10-CU-NEXT: s_endpgm 3558; 3559; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_cmpxchg: 3560; SKIP-CACHE-INV: ; %bb.0: ; %entry 3561; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3562; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3563; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3564; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3565; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3566; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3567; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3568; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3569; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3570; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3571; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3572; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3573; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3574; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3575; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3576; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3577; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3578; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3579; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3580; SKIP-CACHE-INV-NEXT: s_endpgm 3581; 3582; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: 3583; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3584; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3585; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3586; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3587; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3588; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3589; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3590; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3591; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3592; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3593; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3594; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3595; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3596; 3597; GFX90A-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: 3598; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3599; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3600; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3601; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3602; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3603; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3604; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3605; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3606; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3607; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3608; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3609; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3610; GFX90A-TGSPLIT-NEXT: s_endpgm 3611; 3612; GFX940-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: 3613; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3614; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3615; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3616; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3617; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3618; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3619; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3620; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3621; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3622; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3623; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 3624; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3625; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3626; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3627; 3628; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: 3629; GFX940-TGSPLIT: ; %bb.0: ; %entry 3630; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3631; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3632; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3633; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3634; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3635; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3636; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3637; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3638; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3639; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 3640; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3641; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3642; GFX940-TGSPLIT-NEXT: s_endpgm 3643; 3644; GFX11-WGP-LABEL: global_agent_release_monotonic_cmpxchg: 3645; GFX11-WGP: ; %bb.0: ; %entry 3646; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3647; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3648; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3649; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3650; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3651; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3652; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3653; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3654; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3655; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3656; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3657; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3658; GFX11-WGP-NEXT: s_endpgm 3659; 3660; GFX11-CU-LABEL: global_agent_release_monotonic_cmpxchg: 3661; GFX11-CU: ; %bb.0: ; %entry 3662; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3663; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3664; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3665; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3666; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3667; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3668; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3669; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3670; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3671; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3672; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3673; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3674; GFX11-CU-NEXT: s_endpgm 3675; 3676; GFX12-WGP-LABEL: global_agent_release_monotonic_cmpxchg: 3677; GFX12-WGP: ; %bb.0: ; %entry 3678; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3679; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3680; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3681; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3682; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3683; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3684; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3685; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3686; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3687; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3688; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3689; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3690; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 3691; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3692; GFX12-WGP-NEXT: s_endpgm 3693; 3694; GFX12-CU-LABEL: global_agent_release_monotonic_cmpxchg: 3695; GFX12-CU: ; %bb.0: ; %entry 3696; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3697; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3698; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3699; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3700; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3701; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3702; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3703; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3704; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3705; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3706; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3707; GFX12-CU-NEXT: s_wait_storecnt 0x0 3708; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3709; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3710; GFX12-CU-NEXT: s_endpgm 3711 ptr addrspace(1) %out, i32 %in, i32 %old) { 3712entry: 3713 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3714 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release monotonic 3715 ret void 3716} 3717 3718define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( 3719; GFX6-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3720; GFX6: ; %bb.0: ; %entry 3721; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3722; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3723; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3724; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3725; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3726; GFX6-NEXT: s_mov_b32 s12, s5 3727; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3728; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3729; GFX6-NEXT: s_mov_b32 s11, -1 3730; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3731; GFX6-NEXT: s_mov_b32 s5, s12 3732; GFX6-NEXT: s_mov_b32 s6, s11 3733; GFX6-NEXT: s_mov_b32 s7, s10 3734; GFX6-NEXT: v_mov_b32_e32 v0, s9 3735; GFX6-NEXT: v_mov_b32_e32 v2, s8 3736; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3737; GFX6-NEXT: v_mov_b32_e32 v1, v2 3738; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3739; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3740; GFX6-NEXT: s_waitcnt vmcnt(0) 3741; GFX6-NEXT: buffer_wbinvl1 3742; GFX6-NEXT: s_endpgm 3743; 3744; GFX7-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3745; GFX7: ; %bb.0: ; %entry 3746; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3747; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3748; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3749; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3750; GFX7-NEXT: s_mov_b64 s[10:11], 16 3751; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3752; GFX7-NEXT: s_mov_b32 s4, s8 3753; GFX7-NEXT: s_mov_b32 s5, s9 3754; GFX7-NEXT: s_mov_b32 s9, s10 3755; GFX7-NEXT: s_mov_b32 s8, s11 3756; GFX7-NEXT: s_add_u32 s4, s4, s9 3757; GFX7-NEXT: s_addc_u32 s8, s5, s8 3758; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3759; GFX7-NEXT: s_mov_b32 s5, s8 3760; GFX7-NEXT: v_mov_b32_e32 v2, s7 3761; GFX7-NEXT: v_mov_b32_e32 v0, s6 3762; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3763; GFX7-NEXT: v_mov_b32_e32 v3, v0 3764; GFX7-NEXT: v_mov_b32_e32 v0, s4 3765; GFX7-NEXT: v_mov_b32_e32 v1, s5 3766; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3767; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3768; GFX7-NEXT: s_waitcnt vmcnt(0) 3769; GFX7-NEXT: buffer_wbinvl1_vol 3770; GFX7-NEXT: s_endpgm 3771; 3772; GFX10-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3773; GFX10-WGP: ; %bb.0: ; %entry 3774; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3775; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3776; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3777; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3778; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3779; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3780; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3781; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3782; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3783; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3784; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3785; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3786; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3787; GFX10-WGP-NEXT: buffer_gl1_inv 3788; GFX10-WGP-NEXT: buffer_gl0_inv 3789; GFX10-WGP-NEXT: s_endpgm 3790; 3791; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3792; GFX10-CU: ; %bb.0: ; %entry 3793; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3794; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3795; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3796; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3797; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3798; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3799; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3800; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3801; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3802; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3803; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3804; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3805; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3806; GFX10-CU-NEXT: buffer_gl1_inv 3807; GFX10-CU-NEXT: buffer_gl0_inv 3808; GFX10-CU-NEXT: s_endpgm 3809; 3810; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3811; SKIP-CACHE-INV: ; %bb.0: ; %entry 3812; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3813; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3814; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3815; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3816; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3817; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3818; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3819; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3820; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3821; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3822; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3823; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3824; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3825; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3826; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3827; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3828; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3829; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3830; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3831; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3832; SKIP-CACHE-INV-NEXT: s_endpgm 3833; 3834; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3835; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3836; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3837; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3838; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3839; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3840; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3841; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3842; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3843; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3844; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3845; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3846; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3847; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3848; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3849; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3850; 3851; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3852; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3853; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3854; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3855; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3856; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3857; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3858; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3859; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3860; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3861; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3862; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3863; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3864; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3865; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3866; GFX90A-TGSPLIT-NEXT: s_endpgm 3867; 3868; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3869; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3870; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3871; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3872; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3873; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3874; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3875; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3876; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3877; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3878; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3879; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 3880; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3881; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3882; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3883; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 3884; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3885; 3886; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3887; GFX940-TGSPLIT: ; %bb.0: ; %entry 3888; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3889; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3890; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3891; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3892; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3893; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3894; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3895; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3896; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3897; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 3898; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3899; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3900; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3901; GFX940-TGSPLIT-NEXT: buffer_inv sc1 3902; GFX940-TGSPLIT-NEXT: s_endpgm 3903; 3904; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3905; GFX11-WGP: ; %bb.0: ; %entry 3906; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3907; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3908; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3909; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3910; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3911; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3912; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3913; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3914; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3915; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3916; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3917; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3918; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3919; GFX11-WGP-NEXT: buffer_gl1_inv 3920; GFX11-WGP-NEXT: buffer_gl0_inv 3921; GFX11-WGP-NEXT: s_endpgm 3922; 3923; GFX11-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3924; GFX11-CU: ; %bb.0: ; %entry 3925; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3926; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3927; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3928; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3929; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3930; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3931; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3932; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3933; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3934; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3935; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3936; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3937; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3938; GFX11-CU-NEXT: buffer_gl1_inv 3939; GFX11-CU-NEXT: buffer_gl0_inv 3940; GFX11-CU-NEXT: s_endpgm 3941; 3942; GFX12-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3943; GFX12-WGP: ; %bb.0: ; %entry 3944; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3945; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3946; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3947; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3948; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3949; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3950; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3951; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3952; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3953; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3954; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3955; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3956; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 3957; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3958; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3959; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 3960; GFX12-WGP-NEXT: s_endpgm 3961; 3962; GFX12-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 3963; GFX12-CU: ; %bb.0: ; %entry 3964; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3965; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3966; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3967; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3968; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3969; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3970; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3971; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3972; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3973; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3974; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3975; GFX12-CU-NEXT: s_wait_storecnt 0x0 3976; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3977; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 3978; GFX12-CU-NEXT: s_wait_storecnt 0x0 3979; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 3980; GFX12-CU-NEXT: s_endpgm 3981 ptr addrspace(1) %out, i32 %in, i32 %old) { 3982entry: 3983 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3984 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic 3985 ret void 3986} 3987 3988define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( 3989; GFX6-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 3990; GFX6: ; %bb.0: ; %entry 3991; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3992; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3993; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3994; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3995; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3996; GFX6-NEXT: s_mov_b32 s12, s5 3997; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3998; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3999; GFX6-NEXT: s_mov_b32 s11, -1 4000; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4001; GFX6-NEXT: s_mov_b32 s5, s12 4002; GFX6-NEXT: s_mov_b32 s6, s11 4003; GFX6-NEXT: s_mov_b32 s7, s10 4004; GFX6-NEXT: v_mov_b32_e32 v0, s9 4005; GFX6-NEXT: v_mov_b32_e32 v2, s8 4006; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4007; GFX6-NEXT: v_mov_b32_e32 v1, v2 4008; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4009; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4010; GFX6-NEXT: s_waitcnt vmcnt(0) 4011; GFX6-NEXT: buffer_wbinvl1 4012; GFX6-NEXT: s_endpgm 4013; 4014; GFX7-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4015; GFX7: ; %bb.0: ; %entry 4016; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4017; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4018; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4019; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4020; GFX7-NEXT: s_mov_b64 s[10:11], 16 4021; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4022; GFX7-NEXT: s_mov_b32 s4, s8 4023; GFX7-NEXT: s_mov_b32 s5, s9 4024; GFX7-NEXT: s_mov_b32 s9, s10 4025; GFX7-NEXT: s_mov_b32 s8, s11 4026; GFX7-NEXT: s_add_u32 s4, s4, s9 4027; GFX7-NEXT: s_addc_u32 s8, s5, s8 4028; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4029; GFX7-NEXT: s_mov_b32 s5, s8 4030; GFX7-NEXT: v_mov_b32_e32 v2, s7 4031; GFX7-NEXT: v_mov_b32_e32 v0, s6 4032; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4033; GFX7-NEXT: v_mov_b32_e32 v3, v0 4034; GFX7-NEXT: v_mov_b32_e32 v0, s4 4035; GFX7-NEXT: v_mov_b32_e32 v1, s5 4036; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4037; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4038; GFX7-NEXT: s_waitcnt vmcnt(0) 4039; GFX7-NEXT: buffer_wbinvl1_vol 4040; GFX7-NEXT: s_endpgm 4041; 4042; GFX10-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4043; GFX10-WGP: ; %bb.0: ; %entry 4044; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4045; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4046; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4047; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4048; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4049; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4050; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4051; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4052; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4053; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4054; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4055; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4056; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4057; GFX10-WGP-NEXT: buffer_gl1_inv 4058; GFX10-WGP-NEXT: buffer_gl0_inv 4059; GFX10-WGP-NEXT: s_endpgm 4060; 4061; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4062; GFX10-CU: ; %bb.0: ; %entry 4063; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4064; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4065; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4066; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4067; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4068; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4069; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4070; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4071; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4072; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4073; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4074; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4075; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4076; GFX10-CU-NEXT: buffer_gl1_inv 4077; GFX10-CU-NEXT: buffer_gl0_inv 4078; GFX10-CU-NEXT: s_endpgm 4079; 4080; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4081; SKIP-CACHE-INV: ; %bb.0: ; %entry 4082; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4083; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4084; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4085; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4086; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4087; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4088; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4089; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4090; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4091; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4092; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4093; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4094; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4095; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4096; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4097; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4098; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4099; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4100; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4101; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4102; SKIP-CACHE-INV-NEXT: s_endpgm 4103; 4104; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4105; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4106; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4107; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4108; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4109; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4110; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4111; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4112; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4113; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4114; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4115; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4116; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4117; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4118; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4119; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4120; 4121; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4122; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4123; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4124; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4125; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4126; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4127; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4128; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4129; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4130; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4131; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4132; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4133; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4134; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4135; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4136; GFX90A-TGSPLIT-NEXT: s_endpgm 4137; 4138; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4139; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4140; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4141; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4142; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4143; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4144; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4145; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4146; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4147; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4148; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4149; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 4150; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4151; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4152; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4153; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 4154; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4155; 4156; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4157; GFX940-TGSPLIT: ; %bb.0: ; %entry 4158; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4159; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4160; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4161; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4162; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4163; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4164; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4165; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4166; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4167; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 4168; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4169; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4170; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4171; GFX940-TGSPLIT-NEXT: buffer_inv sc1 4172; GFX940-TGSPLIT-NEXT: s_endpgm 4173; 4174; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4175; GFX11-WGP: ; %bb.0: ; %entry 4176; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4177; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4178; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4179; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4180; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4181; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4182; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4183; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4184; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4185; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4186; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4187; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4188; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4189; GFX11-WGP-NEXT: buffer_gl1_inv 4190; GFX11-WGP-NEXT: buffer_gl0_inv 4191; GFX11-WGP-NEXT: s_endpgm 4192; 4193; GFX11-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4194; GFX11-CU: ; %bb.0: ; %entry 4195; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4196; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4197; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4198; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4199; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4200; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4201; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4202; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4203; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4204; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4205; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4206; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4207; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4208; GFX11-CU-NEXT: buffer_gl1_inv 4209; GFX11-CU-NEXT: buffer_gl0_inv 4210; GFX11-CU-NEXT: s_endpgm 4211; 4212; GFX12-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4213; GFX12-WGP: ; %bb.0: ; %entry 4214; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4215; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4216; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4217; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4218; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4219; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4220; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4221; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4222; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4223; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 4224; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 4225; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4226; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 4227; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4228; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4229; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 4230; GFX12-WGP-NEXT: s_endpgm 4231; 4232; GFX12-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 4233; GFX12-CU: ; %bb.0: ; %entry 4234; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4235; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4236; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4237; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4238; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4239; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4240; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4241; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4242; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4243; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 4244; GFX12-CU-NEXT: s_wait_samplecnt 0x0 4245; GFX12-CU-NEXT: s_wait_storecnt 0x0 4246; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 4247; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4248; GFX12-CU-NEXT: s_wait_storecnt 0x0 4249; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 4250; GFX12-CU-NEXT: s_endpgm 4251 ptr addrspace(1) %out, i32 %in, i32 %old) { 4252entry: 4253 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4254 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic 4255 ret void 4256} 4257 4258define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( 4259; GFX6-LABEL: global_agent_monotonic_acquire_cmpxchg: 4260; GFX6: ; %bb.0: ; %entry 4261; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4262; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4263; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4264; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4265; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4266; GFX6-NEXT: s_mov_b32 s12, s5 4267; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4268; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4269; GFX6-NEXT: s_mov_b32 s11, -1 4270; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4271; GFX6-NEXT: s_mov_b32 s5, s12 4272; GFX6-NEXT: s_mov_b32 s6, s11 4273; GFX6-NEXT: s_mov_b32 s7, s10 4274; GFX6-NEXT: v_mov_b32_e32 v0, s9 4275; GFX6-NEXT: v_mov_b32_e32 v2, s8 4276; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4277; GFX6-NEXT: v_mov_b32_e32 v1, v2 4278; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4279; GFX6-NEXT: s_waitcnt vmcnt(0) 4280; GFX6-NEXT: buffer_wbinvl1 4281; GFX6-NEXT: s_endpgm 4282; 4283; GFX7-LABEL: global_agent_monotonic_acquire_cmpxchg: 4284; GFX7: ; %bb.0: ; %entry 4285; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4286; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4287; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4288; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4289; GFX7-NEXT: s_mov_b64 s[10:11], 16 4290; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4291; GFX7-NEXT: s_mov_b32 s4, s8 4292; GFX7-NEXT: s_mov_b32 s5, s9 4293; GFX7-NEXT: s_mov_b32 s9, s10 4294; GFX7-NEXT: s_mov_b32 s8, s11 4295; GFX7-NEXT: s_add_u32 s4, s4, s9 4296; GFX7-NEXT: s_addc_u32 s8, s5, s8 4297; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4298; GFX7-NEXT: s_mov_b32 s5, s8 4299; GFX7-NEXT: v_mov_b32_e32 v2, s7 4300; GFX7-NEXT: v_mov_b32_e32 v0, s6 4301; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4302; GFX7-NEXT: v_mov_b32_e32 v3, v0 4303; GFX7-NEXT: v_mov_b32_e32 v0, s4 4304; GFX7-NEXT: v_mov_b32_e32 v1, s5 4305; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4306; GFX7-NEXT: s_waitcnt vmcnt(0) 4307; GFX7-NEXT: buffer_wbinvl1_vol 4308; GFX7-NEXT: s_endpgm 4309; 4310; GFX10-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg: 4311; GFX10-WGP: ; %bb.0: ; %entry 4312; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4313; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4314; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4315; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4316; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4317; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4318; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4319; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4320; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4321; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4322; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4323; GFX10-WGP-NEXT: buffer_gl1_inv 4324; GFX10-WGP-NEXT: buffer_gl0_inv 4325; GFX10-WGP-NEXT: s_endpgm 4326; 4327; GFX10-CU-LABEL: global_agent_monotonic_acquire_cmpxchg: 4328; GFX10-CU: ; %bb.0: ; %entry 4329; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4330; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4331; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4332; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4333; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4334; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4335; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4336; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4337; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4338; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4339; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4340; GFX10-CU-NEXT: buffer_gl1_inv 4341; GFX10-CU-NEXT: buffer_gl0_inv 4342; GFX10-CU-NEXT: s_endpgm 4343; 4344; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_cmpxchg: 4345; SKIP-CACHE-INV: ; %bb.0: ; %entry 4346; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4347; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4348; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4349; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4350; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4351; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4352; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4353; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4354; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4355; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4356; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4357; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4358; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4359; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4360; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4361; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4362; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4363; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4364; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4365; SKIP-CACHE-INV-NEXT: s_endpgm 4366; 4367; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: 4368; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4369; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4370; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4371; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4372; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4373; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4374; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4375; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4376; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4377; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4378; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4379; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4380; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4381; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4382; 4383; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: 4384; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4385; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4386; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4387; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4388; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4389; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4390; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4391; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4392; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4393; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4394; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4395; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4396; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4397; GFX90A-TGSPLIT-NEXT: s_endpgm 4398; 4399; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: 4400; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4401; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4402; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4403; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4404; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4405; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4406; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4407; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4408; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4409; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4410; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4411; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4412; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 4413; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4414; 4415; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: 4416; GFX940-TGSPLIT: ; %bb.0: ; %entry 4417; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4418; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4419; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4420; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4421; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4422; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4423; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4424; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4425; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4426; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4427; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4428; GFX940-TGSPLIT-NEXT: buffer_inv sc1 4429; GFX940-TGSPLIT-NEXT: s_endpgm 4430; 4431; GFX11-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg: 4432; GFX11-WGP: ; %bb.0: ; %entry 4433; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4434; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4435; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4436; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4437; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4438; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4439; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4440; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4441; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4442; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4443; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4444; GFX11-WGP-NEXT: buffer_gl1_inv 4445; GFX11-WGP-NEXT: buffer_gl0_inv 4446; GFX11-WGP-NEXT: s_endpgm 4447; 4448; GFX11-CU-LABEL: global_agent_monotonic_acquire_cmpxchg: 4449; GFX11-CU: ; %bb.0: ; %entry 4450; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4451; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4452; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4453; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4454; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4455; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4456; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4457; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4458; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4459; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4460; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4461; GFX11-CU-NEXT: buffer_gl1_inv 4462; GFX11-CU-NEXT: buffer_gl0_inv 4463; GFX11-CU-NEXT: s_endpgm 4464; 4465; GFX12-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg: 4466; GFX12-WGP: ; %bb.0: ; %entry 4467; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4468; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4469; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4470; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4471; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4472; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4473; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4474; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4475; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4476; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4477; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4478; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 4479; GFX12-WGP-NEXT: s_endpgm 4480; 4481; GFX12-CU-LABEL: global_agent_monotonic_acquire_cmpxchg: 4482; GFX12-CU: ; %bb.0: ; %entry 4483; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4484; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4485; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4486; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4487; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4488; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4489; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4490; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4491; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4492; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4493; GFX12-CU-NEXT: s_wait_storecnt 0x0 4494; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 4495; GFX12-CU-NEXT: s_endpgm 4496 ptr addrspace(1) %out, i32 %in, i32 %old) { 4497entry: 4498 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4499 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire 4500 ret void 4501} 4502 4503define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( 4504; GFX6-LABEL: global_agent_acquire_acquire_cmpxchg: 4505; GFX6: ; %bb.0: ; %entry 4506; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4507; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4508; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4509; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4510; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4511; GFX6-NEXT: s_mov_b32 s12, s5 4512; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4513; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4514; GFX6-NEXT: s_mov_b32 s11, -1 4515; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4516; GFX6-NEXT: s_mov_b32 s5, s12 4517; GFX6-NEXT: s_mov_b32 s6, s11 4518; GFX6-NEXT: s_mov_b32 s7, s10 4519; GFX6-NEXT: v_mov_b32_e32 v0, s9 4520; GFX6-NEXT: v_mov_b32_e32 v2, s8 4521; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4522; GFX6-NEXT: v_mov_b32_e32 v1, v2 4523; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4524; GFX6-NEXT: s_waitcnt vmcnt(0) 4525; GFX6-NEXT: buffer_wbinvl1 4526; GFX6-NEXT: s_endpgm 4527; 4528; GFX7-LABEL: global_agent_acquire_acquire_cmpxchg: 4529; GFX7: ; %bb.0: ; %entry 4530; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4531; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4532; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4533; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4534; GFX7-NEXT: s_mov_b64 s[10:11], 16 4535; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4536; GFX7-NEXT: s_mov_b32 s4, s8 4537; GFX7-NEXT: s_mov_b32 s5, s9 4538; GFX7-NEXT: s_mov_b32 s9, s10 4539; GFX7-NEXT: s_mov_b32 s8, s11 4540; GFX7-NEXT: s_add_u32 s4, s4, s9 4541; GFX7-NEXT: s_addc_u32 s8, s5, s8 4542; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4543; GFX7-NEXT: s_mov_b32 s5, s8 4544; GFX7-NEXT: v_mov_b32_e32 v2, s7 4545; GFX7-NEXT: v_mov_b32_e32 v0, s6 4546; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4547; GFX7-NEXT: v_mov_b32_e32 v3, v0 4548; GFX7-NEXT: v_mov_b32_e32 v0, s4 4549; GFX7-NEXT: v_mov_b32_e32 v1, s5 4550; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4551; GFX7-NEXT: s_waitcnt vmcnt(0) 4552; GFX7-NEXT: buffer_wbinvl1_vol 4553; GFX7-NEXT: s_endpgm 4554; 4555; GFX10-WGP-LABEL: global_agent_acquire_acquire_cmpxchg: 4556; GFX10-WGP: ; %bb.0: ; %entry 4557; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4558; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4559; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4560; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4561; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4562; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4563; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4564; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4565; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4566; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4567; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4568; GFX10-WGP-NEXT: buffer_gl1_inv 4569; GFX10-WGP-NEXT: buffer_gl0_inv 4570; GFX10-WGP-NEXT: s_endpgm 4571; 4572; GFX10-CU-LABEL: global_agent_acquire_acquire_cmpxchg: 4573; GFX10-CU: ; %bb.0: ; %entry 4574; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4575; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4576; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4577; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4578; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4579; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4580; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4581; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4582; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4583; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4584; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4585; GFX10-CU-NEXT: buffer_gl1_inv 4586; GFX10-CU-NEXT: buffer_gl0_inv 4587; GFX10-CU-NEXT: s_endpgm 4588; 4589; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg: 4590; SKIP-CACHE-INV: ; %bb.0: ; %entry 4591; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4592; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4593; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4594; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4595; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4596; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4597; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4598; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4599; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4600; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4601; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4602; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4603; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4604; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4605; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4606; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4607; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4608; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4609; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4610; SKIP-CACHE-INV-NEXT: s_endpgm 4611; 4612; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: 4613; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4614; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4615; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4616; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4617; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4618; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4619; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4620; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4621; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4622; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4623; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4624; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4625; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4626; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4627; 4628; GFX90A-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: 4629; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4630; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4631; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4632; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4633; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4634; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4635; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4636; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4637; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4638; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4639; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4640; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4641; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4642; GFX90A-TGSPLIT-NEXT: s_endpgm 4643; 4644; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: 4645; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4646; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4647; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4648; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4649; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4650; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4651; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4652; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4653; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4654; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4655; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4656; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4657; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 4658; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4659; 4660; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: 4661; GFX940-TGSPLIT: ; %bb.0: ; %entry 4662; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4663; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4664; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4665; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4666; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4667; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4668; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4669; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4670; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4671; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4672; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4673; GFX940-TGSPLIT-NEXT: buffer_inv sc1 4674; GFX940-TGSPLIT-NEXT: s_endpgm 4675; 4676; GFX11-WGP-LABEL: global_agent_acquire_acquire_cmpxchg: 4677; GFX11-WGP: ; %bb.0: ; %entry 4678; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4679; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4680; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4681; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4682; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4683; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4684; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4685; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4686; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4687; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4688; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4689; GFX11-WGP-NEXT: buffer_gl1_inv 4690; GFX11-WGP-NEXT: buffer_gl0_inv 4691; GFX11-WGP-NEXT: s_endpgm 4692; 4693; GFX11-CU-LABEL: global_agent_acquire_acquire_cmpxchg: 4694; GFX11-CU: ; %bb.0: ; %entry 4695; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4696; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4697; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4698; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4699; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4700; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4701; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4702; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4703; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4704; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4705; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4706; GFX11-CU-NEXT: buffer_gl1_inv 4707; GFX11-CU-NEXT: buffer_gl0_inv 4708; GFX11-CU-NEXT: s_endpgm 4709; 4710; GFX12-WGP-LABEL: global_agent_acquire_acquire_cmpxchg: 4711; GFX12-WGP: ; %bb.0: ; %entry 4712; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4713; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4714; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4715; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4716; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4717; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4718; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4719; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4720; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4721; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4722; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4723; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 4724; GFX12-WGP-NEXT: s_endpgm 4725; 4726; GFX12-CU-LABEL: global_agent_acquire_acquire_cmpxchg: 4727; GFX12-CU: ; %bb.0: ; %entry 4728; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4729; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4730; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4731; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4732; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4733; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4734; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4735; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4736; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4737; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4738; GFX12-CU-NEXT: s_wait_storecnt 0x0 4739; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 4740; GFX12-CU-NEXT: s_endpgm 4741 ptr addrspace(1) %out, i32 %in, i32 %old) { 4742entry: 4743 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4744 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire 4745 ret void 4746} 4747 4748define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( 4749; GFX6-LABEL: global_agent_release_acquire_cmpxchg: 4750; GFX6: ; %bb.0: ; %entry 4751; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4752; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4753; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4754; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4755; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4756; GFX6-NEXT: s_mov_b32 s12, s5 4757; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4758; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4759; GFX6-NEXT: s_mov_b32 s11, -1 4760; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4761; GFX6-NEXT: s_mov_b32 s5, s12 4762; GFX6-NEXT: s_mov_b32 s6, s11 4763; GFX6-NEXT: s_mov_b32 s7, s10 4764; GFX6-NEXT: v_mov_b32_e32 v0, s9 4765; GFX6-NEXT: v_mov_b32_e32 v2, s8 4766; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4767; GFX6-NEXT: v_mov_b32_e32 v1, v2 4768; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4769; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4770; GFX6-NEXT: s_waitcnt vmcnt(0) 4771; GFX6-NEXT: buffer_wbinvl1 4772; GFX6-NEXT: s_endpgm 4773; 4774; GFX7-LABEL: global_agent_release_acquire_cmpxchg: 4775; GFX7: ; %bb.0: ; %entry 4776; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4777; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4778; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4779; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4780; GFX7-NEXT: s_mov_b64 s[10:11], 16 4781; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4782; GFX7-NEXT: s_mov_b32 s4, s8 4783; GFX7-NEXT: s_mov_b32 s5, s9 4784; GFX7-NEXT: s_mov_b32 s9, s10 4785; GFX7-NEXT: s_mov_b32 s8, s11 4786; GFX7-NEXT: s_add_u32 s4, s4, s9 4787; GFX7-NEXT: s_addc_u32 s8, s5, s8 4788; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4789; GFX7-NEXT: s_mov_b32 s5, s8 4790; GFX7-NEXT: v_mov_b32_e32 v2, s7 4791; GFX7-NEXT: v_mov_b32_e32 v0, s6 4792; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4793; GFX7-NEXT: v_mov_b32_e32 v3, v0 4794; GFX7-NEXT: v_mov_b32_e32 v0, s4 4795; GFX7-NEXT: v_mov_b32_e32 v1, s5 4796; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4797; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4798; GFX7-NEXT: s_waitcnt vmcnt(0) 4799; GFX7-NEXT: buffer_wbinvl1_vol 4800; GFX7-NEXT: s_endpgm 4801; 4802; GFX10-WGP-LABEL: global_agent_release_acquire_cmpxchg: 4803; GFX10-WGP: ; %bb.0: ; %entry 4804; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4805; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4806; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4807; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4808; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4809; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4810; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4811; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4812; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4813; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4814; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4815; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4816; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4817; GFX10-WGP-NEXT: buffer_gl1_inv 4818; GFX10-WGP-NEXT: buffer_gl0_inv 4819; GFX10-WGP-NEXT: s_endpgm 4820; 4821; GFX10-CU-LABEL: global_agent_release_acquire_cmpxchg: 4822; GFX10-CU: ; %bb.0: ; %entry 4823; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4824; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4825; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4826; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4827; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4828; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4829; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4830; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4831; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4832; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4833; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4834; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4835; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4836; GFX10-CU-NEXT: buffer_gl1_inv 4837; GFX10-CU-NEXT: buffer_gl0_inv 4838; GFX10-CU-NEXT: s_endpgm 4839; 4840; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg: 4841; SKIP-CACHE-INV: ; %bb.0: ; %entry 4842; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4843; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4844; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4845; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4846; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4847; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4848; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4849; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4850; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4851; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4852; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4853; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4854; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4855; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4856; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4857; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4858; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4859; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4860; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4861; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4862; SKIP-CACHE-INV-NEXT: s_endpgm 4863; 4864; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: 4865; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4866; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4867; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4868; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4869; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4870; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4871; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4872; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4873; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4874; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4875; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4876; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4877; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4878; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4879; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4880; 4881; GFX90A-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: 4882; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4883; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4884; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4885; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4886; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4887; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4888; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4889; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4890; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4891; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4892; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4893; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4894; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4895; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4896; GFX90A-TGSPLIT-NEXT: s_endpgm 4897; 4898; GFX940-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: 4899; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4900; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4901; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4902; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4903; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4904; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4905; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4906; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4907; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4908; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4909; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 4910; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4911; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4912; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4913; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 4914; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4915; 4916; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: 4917; GFX940-TGSPLIT: ; %bb.0: ; %entry 4918; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4919; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4920; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4921; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4922; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4923; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4924; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4925; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4926; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4927; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 4928; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4929; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4930; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4931; GFX940-TGSPLIT-NEXT: buffer_inv sc1 4932; GFX940-TGSPLIT-NEXT: s_endpgm 4933; 4934; GFX11-WGP-LABEL: global_agent_release_acquire_cmpxchg: 4935; GFX11-WGP: ; %bb.0: ; %entry 4936; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4937; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4938; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4939; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4940; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4941; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4942; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4943; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4944; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4945; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4946; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4947; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4948; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4949; GFX11-WGP-NEXT: buffer_gl1_inv 4950; GFX11-WGP-NEXT: buffer_gl0_inv 4951; GFX11-WGP-NEXT: s_endpgm 4952; 4953; GFX11-CU-LABEL: global_agent_release_acquire_cmpxchg: 4954; GFX11-CU: ; %bb.0: ; %entry 4955; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4956; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4957; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4958; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4959; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4960; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4961; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4962; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4963; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4964; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4965; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4966; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4967; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4968; GFX11-CU-NEXT: buffer_gl1_inv 4969; GFX11-CU-NEXT: buffer_gl0_inv 4970; GFX11-CU-NEXT: s_endpgm 4971; 4972; GFX12-WGP-LABEL: global_agent_release_acquire_cmpxchg: 4973; GFX12-WGP: ; %bb.0: ; %entry 4974; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4975; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4976; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4977; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4978; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4979; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4980; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4981; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4982; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4983; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 4984; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 4985; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4986; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 4987; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 4988; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4989; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 4990; GFX12-WGP-NEXT: s_endpgm 4991; 4992; GFX12-CU-LABEL: global_agent_release_acquire_cmpxchg: 4993; GFX12-CU: ; %bb.0: ; %entry 4994; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4995; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4996; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4997; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4998; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4999; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5000; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5001; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5002; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5003; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5004; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5005; GFX12-CU-NEXT: s_wait_storecnt 0x0 5006; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5007; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5008; GFX12-CU-NEXT: s_wait_storecnt 0x0 5009; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 5010; GFX12-CU-NEXT: s_endpgm 5011 ptr addrspace(1) %out, i32 %in, i32 %old) { 5012entry: 5013 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5014 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release acquire 5015 ret void 5016} 5017 5018define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( 5019; GFX6-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5020; GFX6: ; %bb.0: ; %entry 5021; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5022; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5023; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5024; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5025; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5026; GFX6-NEXT: s_mov_b32 s12, s5 5027; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5028; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5029; GFX6-NEXT: s_mov_b32 s11, -1 5030; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5031; GFX6-NEXT: s_mov_b32 s5, s12 5032; GFX6-NEXT: s_mov_b32 s6, s11 5033; GFX6-NEXT: s_mov_b32 s7, s10 5034; GFX6-NEXT: v_mov_b32_e32 v0, s9 5035; GFX6-NEXT: v_mov_b32_e32 v2, s8 5036; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5037; GFX6-NEXT: v_mov_b32_e32 v1, v2 5038; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5039; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5040; GFX6-NEXT: s_waitcnt vmcnt(0) 5041; GFX6-NEXT: buffer_wbinvl1 5042; GFX6-NEXT: s_endpgm 5043; 5044; GFX7-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5045; GFX7: ; %bb.0: ; %entry 5046; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5047; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5048; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5049; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5050; GFX7-NEXT: s_mov_b64 s[10:11], 16 5051; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5052; GFX7-NEXT: s_mov_b32 s4, s8 5053; GFX7-NEXT: s_mov_b32 s5, s9 5054; GFX7-NEXT: s_mov_b32 s9, s10 5055; GFX7-NEXT: s_mov_b32 s8, s11 5056; GFX7-NEXT: s_add_u32 s4, s4, s9 5057; GFX7-NEXT: s_addc_u32 s8, s5, s8 5058; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5059; GFX7-NEXT: s_mov_b32 s5, s8 5060; GFX7-NEXT: v_mov_b32_e32 v2, s7 5061; GFX7-NEXT: v_mov_b32_e32 v0, s6 5062; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5063; GFX7-NEXT: v_mov_b32_e32 v3, v0 5064; GFX7-NEXT: v_mov_b32_e32 v0, s4 5065; GFX7-NEXT: v_mov_b32_e32 v1, s5 5066; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5067; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5068; GFX7-NEXT: s_waitcnt vmcnt(0) 5069; GFX7-NEXT: buffer_wbinvl1_vol 5070; GFX7-NEXT: s_endpgm 5071; 5072; GFX10-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5073; GFX10-WGP: ; %bb.0: ; %entry 5074; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5075; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5076; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5077; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5078; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5079; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5080; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5081; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5082; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5083; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5084; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5085; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5086; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5087; GFX10-WGP-NEXT: buffer_gl1_inv 5088; GFX10-WGP-NEXT: buffer_gl0_inv 5089; GFX10-WGP-NEXT: s_endpgm 5090; 5091; GFX10-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5092; GFX10-CU: ; %bb.0: ; %entry 5093; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5094; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5095; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5096; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5097; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5098; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5099; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5100; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5101; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5102; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5103; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5104; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5105; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5106; GFX10-CU-NEXT: buffer_gl1_inv 5107; GFX10-CU-NEXT: buffer_gl0_inv 5108; GFX10-CU-NEXT: s_endpgm 5109; 5110; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5111; SKIP-CACHE-INV: ; %bb.0: ; %entry 5112; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5113; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5114; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5115; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5116; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5117; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5118; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5119; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5120; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5121; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5122; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5123; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5124; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5125; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5126; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5127; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5128; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5129; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5130; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5131; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5132; SKIP-CACHE-INV-NEXT: s_endpgm 5133; 5134; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5135; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5136; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5137; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5138; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5139; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5140; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5141; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5142; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5143; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5144; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5145; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5146; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5147; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5148; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5149; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5150; 5151; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5152; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5153; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5154; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5155; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5156; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5157; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5158; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5159; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5160; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5161; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5162; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5163; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5164; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5165; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5166; GFX90A-TGSPLIT-NEXT: s_endpgm 5167; 5168; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5169; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5170; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5171; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5172; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5173; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5174; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5175; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5176; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5177; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5178; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5179; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 5180; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5181; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5182; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5183; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 5184; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5185; 5186; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5187; GFX940-TGSPLIT: ; %bb.0: ; %entry 5188; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5189; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5190; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5191; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5192; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5193; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5194; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5195; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5196; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5197; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 5198; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5199; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5200; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5201; GFX940-TGSPLIT-NEXT: buffer_inv sc1 5202; GFX940-TGSPLIT-NEXT: s_endpgm 5203; 5204; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5205; GFX11-WGP: ; %bb.0: ; %entry 5206; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5207; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5208; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5209; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5210; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5211; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5212; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5213; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5214; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5215; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5216; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5217; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5218; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5219; GFX11-WGP-NEXT: buffer_gl1_inv 5220; GFX11-WGP-NEXT: buffer_gl0_inv 5221; GFX11-WGP-NEXT: s_endpgm 5222; 5223; GFX11-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5224; GFX11-CU: ; %bb.0: ; %entry 5225; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5226; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5227; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5228; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5229; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5230; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5231; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5232; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5233; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5234; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5235; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5236; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5237; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5238; GFX11-CU-NEXT: buffer_gl1_inv 5239; GFX11-CU-NEXT: buffer_gl0_inv 5240; GFX11-CU-NEXT: s_endpgm 5241; 5242; GFX12-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5243; GFX12-WGP: ; %bb.0: ; %entry 5244; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5245; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5246; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5247; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5248; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5249; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5250; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5251; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5252; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5253; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5254; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5255; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5256; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5257; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5258; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5259; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 5260; GFX12-WGP-NEXT: s_endpgm 5261; 5262; GFX12-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg: 5263; GFX12-CU: ; %bb.0: ; %entry 5264; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5265; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5266; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5267; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5268; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5269; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5270; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5271; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5272; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5273; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5274; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5275; GFX12-CU-NEXT: s_wait_storecnt 0x0 5276; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5277; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5278; GFX12-CU-NEXT: s_wait_storecnt 0x0 5279; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 5280; GFX12-CU-NEXT: s_endpgm 5281 ptr addrspace(1) %out, i32 %in, i32 %old) { 5282entry: 5283 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5284 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire 5285 ret void 5286} 5287 5288define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( 5289; GFX6-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5290; GFX6: ; %bb.0: ; %entry 5291; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5292; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5293; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5294; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5295; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5296; GFX6-NEXT: s_mov_b32 s12, s5 5297; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5298; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5299; GFX6-NEXT: s_mov_b32 s11, -1 5300; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5301; GFX6-NEXT: s_mov_b32 s5, s12 5302; GFX6-NEXT: s_mov_b32 s6, s11 5303; GFX6-NEXT: s_mov_b32 s7, s10 5304; GFX6-NEXT: v_mov_b32_e32 v0, s9 5305; GFX6-NEXT: v_mov_b32_e32 v2, s8 5306; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5307; GFX6-NEXT: v_mov_b32_e32 v1, v2 5308; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5309; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5310; GFX6-NEXT: s_waitcnt vmcnt(0) 5311; GFX6-NEXT: buffer_wbinvl1 5312; GFX6-NEXT: s_endpgm 5313; 5314; GFX7-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5315; GFX7: ; %bb.0: ; %entry 5316; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5317; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5318; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5319; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5320; GFX7-NEXT: s_mov_b64 s[10:11], 16 5321; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5322; GFX7-NEXT: s_mov_b32 s4, s8 5323; GFX7-NEXT: s_mov_b32 s5, s9 5324; GFX7-NEXT: s_mov_b32 s9, s10 5325; GFX7-NEXT: s_mov_b32 s8, s11 5326; GFX7-NEXT: s_add_u32 s4, s4, s9 5327; GFX7-NEXT: s_addc_u32 s8, s5, s8 5328; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5329; GFX7-NEXT: s_mov_b32 s5, s8 5330; GFX7-NEXT: v_mov_b32_e32 v2, s7 5331; GFX7-NEXT: v_mov_b32_e32 v0, s6 5332; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5333; GFX7-NEXT: v_mov_b32_e32 v3, v0 5334; GFX7-NEXT: v_mov_b32_e32 v0, s4 5335; GFX7-NEXT: v_mov_b32_e32 v1, s5 5336; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5337; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5338; GFX7-NEXT: s_waitcnt vmcnt(0) 5339; GFX7-NEXT: buffer_wbinvl1_vol 5340; GFX7-NEXT: s_endpgm 5341; 5342; GFX10-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5343; GFX10-WGP: ; %bb.0: ; %entry 5344; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5345; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5346; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5347; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5348; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5349; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5350; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5351; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5352; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5353; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5354; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5355; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5356; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5357; GFX10-WGP-NEXT: buffer_gl1_inv 5358; GFX10-WGP-NEXT: buffer_gl0_inv 5359; GFX10-WGP-NEXT: s_endpgm 5360; 5361; GFX10-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5362; GFX10-CU: ; %bb.0: ; %entry 5363; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5364; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5365; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5366; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5367; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5368; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5369; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5370; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5371; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5372; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5373; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5374; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5375; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5376; GFX10-CU-NEXT: buffer_gl1_inv 5377; GFX10-CU-NEXT: buffer_gl0_inv 5378; GFX10-CU-NEXT: s_endpgm 5379; 5380; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5381; SKIP-CACHE-INV: ; %bb.0: ; %entry 5382; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5383; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5384; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5385; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5386; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5387; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5388; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5389; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5390; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5391; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5392; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5393; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5394; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5395; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5396; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5397; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5398; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5399; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5400; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5401; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5402; SKIP-CACHE-INV-NEXT: s_endpgm 5403; 5404; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5405; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5406; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5407; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5408; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5409; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5410; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5411; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5412; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5413; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5414; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5415; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5416; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5417; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5418; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5419; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5420; 5421; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5422; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5423; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5424; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5425; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5426; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5427; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5428; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5429; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5430; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5431; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5432; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5433; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5434; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5435; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5436; GFX90A-TGSPLIT-NEXT: s_endpgm 5437; 5438; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5439; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5440; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5441; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5442; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5443; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5444; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5445; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5446; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5447; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5448; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5449; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 5450; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5451; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5452; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5453; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 5454; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5455; 5456; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5457; GFX940-TGSPLIT: ; %bb.0: ; %entry 5458; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5459; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5460; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5461; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5462; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5463; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5464; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5465; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5466; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5467; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 5468; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5469; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5470; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5471; GFX940-TGSPLIT-NEXT: buffer_inv sc1 5472; GFX940-TGSPLIT-NEXT: s_endpgm 5473; 5474; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5475; GFX11-WGP: ; %bb.0: ; %entry 5476; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5477; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5478; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5479; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5480; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5481; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5482; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5483; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5484; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5485; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5486; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5487; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5488; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5489; GFX11-WGP-NEXT: buffer_gl1_inv 5490; GFX11-WGP-NEXT: buffer_gl0_inv 5491; GFX11-WGP-NEXT: s_endpgm 5492; 5493; GFX11-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5494; GFX11-CU: ; %bb.0: ; %entry 5495; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5496; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5497; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5498; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5499; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5500; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5501; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5502; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5503; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5504; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5505; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5506; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5507; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5508; GFX11-CU-NEXT: buffer_gl1_inv 5509; GFX11-CU-NEXT: buffer_gl0_inv 5510; GFX11-CU-NEXT: s_endpgm 5511; 5512; GFX12-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5513; GFX12-WGP: ; %bb.0: ; %entry 5514; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5515; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5516; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5517; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5518; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5519; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5520; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5521; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5522; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5523; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5524; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5525; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5526; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5527; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5528; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5529; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 5530; GFX12-WGP-NEXT: s_endpgm 5531; 5532; GFX12-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg: 5533; GFX12-CU: ; %bb.0: ; %entry 5534; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5535; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5536; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5537; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5538; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5539; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5540; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5541; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5542; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5543; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5544; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5545; GFX12-CU-NEXT: s_wait_storecnt 0x0 5546; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5547; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5548; GFX12-CU-NEXT: s_wait_storecnt 0x0 5549; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 5550; GFX12-CU-NEXT: s_endpgm 5551 ptr addrspace(1) %out, i32 %in, i32 %old) { 5552entry: 5553 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5554 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire 5555 ret void 5556} 5557 5558define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( 5559; GFX6-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5560; GFX6: ; %bb.0: ; %entry 5561; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5562; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5563; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5564; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5565; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5566; GFX6-NEXT: s_mov_b32 s12, s5 5567; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5568; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5569; GFX6-NEXT: s_mov_b32 s11, -1 5570; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5571; GFX6-NEXT: s_mov_b32 s5, s12 5572; GFX6-NEXT: s_mov_b32 s6, s11 5573; GFX6-NEXT: s_mov_b32 s7, s10 5574; GFX6-NEXT: v_mov_b32_e32 v0, s9 5575; GFX6-NEXT: v_mov_b32_e32 v2, s8 5576; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5577; GFX6-NEXT: v_mov_b32_e32 v1, v2 5578; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5579; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5580; GFX6-NEXT: s_waitcnt vmcnt(0) 5581; GFX6-NEXT: buffer_wbinvl1 5582; GFX6-NEXT: s_endpgm 5583; 5584; GFX7-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5585; GFX7: ; %bb.0: ; %entry 5586; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5587; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5588; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5589; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5590; GFX7-NEXT: s_mov_b64 s[10:11], 16 5591; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5592; GFX7-NEXT: s_mov_b32 s4, s8 5593; GFX7-NEXT: s_mov_b32 s5, s9 5594; GFX7-NEXT: s_mov_b32 s9, s10 5595; GFX7-NEXT: s_mov_b32 s8, s11 5596; GFX7-NEXT: s_add_u32 s4, s4, s9 5597; GFX7-NEXT: s_addc_u32 s8, s5, s8 5598; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5599; GFX7-NEXT: s_mov_b32 s5, s8 5600; GFX7-NEXT: v_mov_b32_e32 v2, s7 5601; GFX7-NEXT: v_mov_b32_e32 v0, s6 5602; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5603; GFX7-NEXT: v_mov_b32_e32 v3, v0 5604; GFX7-NEXT: v_mov_b32_e32 v0, s4 5605; GFX7-NEXT: v_mov_b32_e32 v1, s5 5606; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5607; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5608; GFX7-NEXT: s_waitcnt vmcnt(0) 5609; GFX7-NEXT: buffer_wbinvl1_vol 5610; GFX7-NEXT: s_endpgm 5611; 5612; GFX10-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5613; GFX10-WGP: ; %bb.0: ; %entry 5614; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5615; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5616; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5617; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5618; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5619; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5620; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5621; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5622; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5623; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5624; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5625; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5626; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5627; GFX10-WGP-NEXT: buffer_gl1_inv 5628; GFX10-WGP-NEXT: buffer_gl0_inv 5629; GFX10-WGP-NEXT: s_endpgm 5630; 5631; GFX10-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5632; GFX10-CU: ; %bb.0: ; %entry 5633; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5634; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5635; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5636; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5637; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5638; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5639; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5640; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5641; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5642; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5643; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5644; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5645; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5646; GFX10-CU-NEXT: buffer_gl1_inv 5647; GFX10-CU-NEXT: buffer_gl0_inv 5648; GFX10-CU-NEXT: s_endpgm 5649; 5650; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5651; SKIP-CACHE-INV: ; %bb.0: ; %entry 5652; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5653; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5654; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5655; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5656; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5657; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5658; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5659; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5660; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5661; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5662; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5663; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5664; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5665; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5666; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5667; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5668; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5669; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5670; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5671; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5672; SKIP-CACHE-INV-NEXT: s_endpgm 5673; 5674; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5675; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5676; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5677; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5678; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5679; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5680; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5681; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5682; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5683; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5684; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5685; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5686; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5687; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5688; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5689; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5690; 5691; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5692; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5693; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5694; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5695; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5696; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5697; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5698; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5699; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5700; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5701; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5702; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5703; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5704; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5705; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5706; GFX90A-TGSPLIT-NEXT: s_endpgm 5707; 5708; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5709; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5710; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5711; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5712; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5713; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5714; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5715; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5716; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5717; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5718; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5719; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 5720; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5721; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5722; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5723; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 5724; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5725; 5726; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5727; GFX940-TGSPLIT: ; %bb.0: ; %entry 5728; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5729; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5730; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5731; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5732; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5733; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5734; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5735; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5736; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5737; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 5738; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5739; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5740; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5741; GFX940-TGSPLIT-NEXT: buffer_inv sc1 5742; GFX940-TGSPLIT-NEXT: s_endpgm 5743; 5744; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5745; GFX11-WGP: ; %bb.0: ; %entry 5746; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5747; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5748; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5749; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5750; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5751; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5752; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5753; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5754; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5755; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5756; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5757; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5758; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5759; GFX11-WGP-NEXT: buffer_gl1_inv 5760; GFX11-WGP-NEXT: buffer_gl0_inv 5761; GFX11-WGP-NEXT: s_endpgm 5762; 5763; GFX11-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5764; GFX11-CU: ; %bb.0: ; %entry 5765; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5766; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5767; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5768; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5769; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5770; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5771; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5772; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5773; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5774; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5775; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5776; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5777; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5778; GFX11-CU-NEXT: buffer_gl1_inv 5779; GFX11-CU-NEXT: buffer_gl0_inv 5780; GFX11-CU-NEXT: s_endpgm 5781; 5782; GFX12-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5783; GFX12-WGP: ; %bb.0: ; %entry 5784; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5785; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5786; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5787; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5788; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5789; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5790; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5791; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5792; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5793; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5794; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5795; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5796; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5797; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5798; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5799; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 5800; GFX12-WGP-NEXT: s_endpgm 5801; 5802; GFX12-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg: 5803; GFX12-CU: ; %bb.0: ; %entry 5804; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5805; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5806; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5807; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5808; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5809; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5810; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5811; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5812; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5813; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5814; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5815; GFX12-CU-NEXT: s_wait_storecnt 0x0 5816; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5817; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 5818; GFX12-CU-NEXT: s_wait_storecnt 0x0 5819; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 5820; GFX12-CU-NEXT: s_endpgm 5821 ptr addrspace(1) %out, i32 %in, i32 %old) { 5822entry: 5823 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5824 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst 5825 ret void 5826} 5827 5828define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( 5829; GFX6-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5830; GFX6: ; %bb.0: ; %entry 5831; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5832; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5833; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5834; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5835; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5836; GFX6-NEXT: s_mov_b32 s12, s5 5837; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5838; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5839; GFX6-NEXT: s_mov_b32 s11, -1 5840; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5841; GFX6-NEXT: s_mov_b32 s5, s12 5842; GFX6-NEXT: s_mov_b32 s6, s11 5843; GFX6-NEXT: s_mov_b32 s7, s10 5844; GFX6-NEXT: v_mov_b32_e32 v0, s9 5845; GFX6-NEXT: v_mov_b32_e32 v2, s8 5846; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5847; GFX6-NEXT: v_mov_b32_e32 v1, v2 5848; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5849; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5850; GFX6-NEXT: s_waitcnt vmcnt(0) 5851; GFX6-NEXT: buffer_wbinvl1 5852; GFX6-NEXT: s_endpgm 5853; 5854; GFX7-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5855; GFX7: ; %bb.0: ; %entry 5856; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5857; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5858; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5859; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5860; GFX7-NEXT: s_mov_b64 s[10:11], 16 5861; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5862; GFX7-NEXT: s_mov_b32 s4, s8 5863; GFX7-NEXT: s_mov_b32 s5, s9 5864; GFX7-NEXT: s_mov_b32 s9, s10 5865; GFX7-NEXT: s_mov_b32 s8, s11 5866; GFX7-NEXT: s_add_u32 s4, s4, s9 5867; GFX7-NEXT: s_addc_u32 s8, s5, s8 5868; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5869; GFX7-NEXT: s_mov_b32 s5, s8 5870; GFX7-NEXT: v_mov_b32_e32 v2, s7 5871; GFX7-NEXT: v_mov_b32_e32 v0, s6 5872; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5873; GFX7-NEXT: v_mov_b32_e32 v3, v0 5874; GFX7-NEXT: v_mov_b32_e32 v0, s4 5875; GFX7-NEXT: v_mov_b32_e32 v1, s5 5876; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5877; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5878; GFX7-NEXT: s_waitcnt vmcnt(0) 5879; GFX7-NEXT: buffer_wbinvl1_vol 5880; GFX7-NEXT: s_endpgm 5881; 5882; GFX10-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5883; GFX10-WGP: ; %bb.0: ; %entry 5884; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5885; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5886; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5887; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5888; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5889; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5890; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5891; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5892; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5893; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5894; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5895; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5896; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5897; GFX10-WGP-NEXT: buffer_gl1_inv 5898; GFX10-WGP-NEXT: buffer_gl0_inv 5899; GFX10-WGP-NEXT: s_endpgm 5900; 5901; GFX10-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5902; GFX10-CU: ; %bb.0: ; %entry 5903; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5904; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5905; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5906; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5907; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5908; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5909; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5910; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5911; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5912; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5913; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5914; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5915; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5916; GFX10-CU-NEXT: buffer_gl1_inv 5917; GFX10-CU-NEXT: buffer_gl0_inv 5918; GFX10-CU-NEXT: s_endpgm 5919; 5920; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5921; SKIP-CACHE-INV: ; %bb.0: ; %entry 5922; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5923; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5924; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5925; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5926; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5927; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5928; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5929; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5930; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5931; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5932; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5933; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5934; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5935; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5936; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5937; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5938; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5939; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5940; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5941; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5942; SKIP-CACHE-INV-NEXT: s_endpgm 5943; 5944; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5945; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5946; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5947; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5948; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5949; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5950; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5951; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5952; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5953; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5954; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5955; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5956; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5957; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5958; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5959; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5960; 5961; GFX90A-TGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5962; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5963; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5964; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5965; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5966; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5967; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5968; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5969; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5970; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5971; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5972; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5973; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5974; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5975; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5976; GFX90A-TGSPLIT-NEXT: s_endpgm 5977; 5978; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5979; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5980; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5981; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5982; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5983; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5984; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5985; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5986; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5987; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5988; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5989; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 5990; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5991; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5992; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5993; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 5994; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5995; 5996; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: 5997; GFX940-TGSPLIT: ; %bb.0: ; %entry 5998; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5999; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6000; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6001; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6002; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6003; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6004; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6005; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6006; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6007; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 6008; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6009; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6010; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6011; GFX940-TGSPLIT-NEXT: buffer_inv sc1 6012; GFX940-TGSPLIT-NEXT: s_endpgm 6013; 6014; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg: 6015; GFX11-WGP: ; %bb.0: ; %entry 6016; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6017; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6018; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6019; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6020; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6021; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6022; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6023; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6024; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6025; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6026; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6027; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6028; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6029; GFX11-WGP-NEXT: buffer_gl1_inv 6030; GFX11-WGP-NEXT: buffer_gl0_inv 6031; GFX11-WGP-NEXT: s_endpgm 6032; 6033; GFX11-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg: 6034; GFX11-CU: ; %bb.0: ; %entry 6035; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6036; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6037; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6038; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6039; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6040; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6041; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6042; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6043; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6044; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6045; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6046; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6047; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6048; GFX11-CU-NEXT: buffer_gl1_inv 6049; GFX11-CU-NEXT: buffer_gl0_inv 6050; GFX11-CU-NEXT: s_endpgm 6051; 6052; GFX12-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg: 6053; GFX12-WGP: ; %bb.0: ; %entry 6054; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6055; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6056; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6057; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6058; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6059; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6060; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6061; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6062; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6063; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6064; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6065; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6066; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6067; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6068; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6069; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 6070; GFX12-WGP-NEXT: s_endpgm 6071; 6072; GFX12-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg: 6073; GFX12-CU: ; %bb.0: ; %entry 6074; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6075; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6076; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6077; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6078; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6079; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6080; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6081; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6082; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6083; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6084; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6085; GFX12-CU-NEXT: s_wait_storecnt 0x0 6086; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6087; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6088; GFX12-CU-NEXT: s_wait_storecnt 0x0 6089; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 6090; GFX12-CU-NEXT: s_endpgm 6091 ptr addrspace(1) %out, i32 %in, i32 %old) { 6092entry: 6093 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6094 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst 6095 ret void 6096} 6097 6098define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( 6099; GFX6-LABEL: global_agent_release_seq_cst_cmpxchg: 6100; GFX6: ; %bb.0: ; %entry 6101; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6102; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6103; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6104; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6105; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6106; GFX6-NEXT: s_mov_b32 s12, s5 6107; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6108; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6109; GFX6-NEXT: s_mov_b32 s11, -1 6110; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6111; GFX6-NEXT: s_mov_b32 s5, s12 6112; GFX6-NEXT: s_mov_b32 s6, s11 6113; GFX6-NEXT: s_mov_b32 s7, s10 6114; GFX6-NEXT: v_mov_b32_e32 v0, s9 6115; GFX6-NEXT: v_mov_b32_e32 v2, s8 6116; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6117; GFX6-NEXT: v_mov_b32_e32 v1, v2 6118; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6119; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6120; GFX6-NEXT: s_waitcnt vmcnt(0) 6121; GFX6-NEXT: buffer_wbinvl1 6122; GFX6-NEXT: s_endpgm 6123; 6124; GFX7-LABEL: global_agent_release_seq_cst_cmpxchg: 6125; GFX7: ; %bb.0: ; %entry 6126; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 6127; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6128; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 6129; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 6130; GFX7-NEXT: s_mov_b64 s[10:11], 16 6131; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6132; GFX7-NEXT: s_mov_b32 s4, s8 6133; GFX7-NEXT: s_mov_b32 s5, s9 6134; GFX7-NEXT: s_mov_b32 s9, s10 6135; GFX7-NEXT: s_mov_b32 s8, s11 6136; GFX7-NEXT: s_add_u32 s4, s4, s9 6137; GFX7-NEXT: s_addc_u32 s8, s5, s8 6138; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6139; GFX7-NEXT: s_mov_b32 s5, s8 6140; GFX7-NEXT: v_mov_b32_e32 v2, s7 6141; GFX7-NEXT: v_mov_b32_e32 v0, s6 6142; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6143; GFX7-NEXT: v_mov_b32_e32 v3, v0 6144; GFX7-NEXT: v_mov_b32_e32 v0, s4 6145; GFX7-NEXT: v_mov_b32_e32 v1, s5 6146; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6147; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6148; GFX7-NEXT: s_waitcnt vmcnt(0) 6149; GFX7-NEXT: buffer_wbinvl1_vol 6150; GFX7-NEXT: s_endpgm 6151; 6152; GFX10-WGP-LABEL: global_agent_release_seq_cst_cmpxchg: 6153; GFX10-WGP: ; %bb.0: ; %entry 6154; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6155; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6156; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6157; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6158; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6159; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6160; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6161; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6162; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6163; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6164; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6165; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 6166; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6167; GFX10-WGP-NEXT: buffer_gl1_inv 6168; GFX10-WGP-NEXT: buffer_gl0_inv 6169; GFX10-WGP-NEXT: s_endpgm 6170; 6171; GFX10-CU-LABEL: global_agent_release_seq_cst_cmpxchg: 6172; GFX10-CU: ; %bb.0: ; %entry 6173; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6174; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6175; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6176; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6177; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6178; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6179; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6180; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6181; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6182; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6183; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6184; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 6185; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6186; GFX10-CU-NEXT: buffer_gl1_inv 6187; GFX10-CU-NEXT: buffer_gl0_inv 6188; GFX10-CU-NEXT: s_endpgm 6189; 6190; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_cmpxchg: 6191; SKIP-CACHE-INV: ; %bb.0: ; %entry 6192; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6193; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6194; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6195; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6196; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6197; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6198; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6199; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6200; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6201; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6202; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6203; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6204; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6205; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6206; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6207; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6208; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6209; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6210; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6211; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6212; SKIP-CACHE-INV-NEXT: s_endpgm 6213; 6214; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: 6215; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6216; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6217; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6218; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6219; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6220; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6221; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6222; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6223; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6224; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6225; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6226; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 6227; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6228; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6229; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6230; 6231; GFX90A-TGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: 6232; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6233; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6234; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6235; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6236; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6237; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6238; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6239; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6240; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6241; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6242; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6243; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 6244; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6245; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6246; GFX90A-TGSPLIT-NEXT: s_endpgm 6247; 6248; GFX940-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: 6249; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6250; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6251; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6252; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6253; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6254; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6255; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6256; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6257; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6258; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6259; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 6260; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6261; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6262; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6263; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 6264; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6265; 6266; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: 6267; GFX940-TGSPLIT: ; %bb.0: ; %entry 6268; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6269; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6270; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6271; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6272; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6273; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6274; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6275; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6276; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6277; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 6278; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6279; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6280; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6281; GFX940-TGSPLIT-NEXT: buffer_inv sc1 6282; GFX940-TGSPLIT-NEXT: s_endpgm 6283; 6284; GFX11-WGP-LABEL: global_agent_release_seq_cst_cmpxchg: 6285; GFX11-WGP: ; %bb.0: ; %entry 6286; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6287; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6288; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6289; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6290; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6291; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6292; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6293; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6294; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6295; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6296; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6297; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6298; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6299; GFX11-WGP-NEXT: buffer_gl1_inv 6300; GFX11-WGP-NEXT: buffer_gl0_inv 6301; GFX11-WGP-NEXT: s_endpgm 6302; 6303; GFX11-CU-LABEL: global_agent_release_seq_cst_cmpxchg: 6304; GFX11-CU: ; %bb.0: ; %entry 6305; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6306; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6307; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6308; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6309; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6310; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6311; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6312; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6313; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6314; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6315; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6316; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6317; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6318; GFX11-CU-NEXT: buffer_gl1_inv 6319; GFX11-CU-NEXT: buffer_gl0_inv 6320; GFX11-CU-NEXT: s_endpgm 6321; 6322; GFX12-WGP-LABEL: global_agent_release_seq_cst_cmpxchg: 6323; GFX12-WGP: ; %bb.0: ; %entry 6324; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6325; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6326; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6327; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6328; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6329; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6330; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6331; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6332; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6333; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6334; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6335; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6336; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6337; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6338; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6339; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 6340; GFX12-WGP-NEXT: s_endpgm 6341; 6342; GFX12-CU-LABEL: global_agent_release_seq_cst_cmpxchg: 6343; GFX12-CU: ; %bb.0: ; %entry 6344; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6345; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6346; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6347; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6348; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6349; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6350; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6351; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6352; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6353; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6354; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6355; GFX12-CU-NEXT: s_wait_storecnt 0x0 6356; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6357; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6358; GFX12-CU-NEXT: s_wait_storecnt 0x0 6359; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 6360; GFX12-CU-NEXT: s_endpgm 6361 ptr addrspace(1) %out, i32 %in, i32 %old) { 6362entry: 6363 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6364 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst 6365 ret void 6366} 6367 6368define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( 6369; GFX6-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6370; GFX6: ; %bb.0: ; %entry 6371; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6372; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6373; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6374; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6375; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6376; GFX6-NEXT: s_mov_b32 s12, s5 6377; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6378; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6379; GFX6-NEXT: s_mov_b32 s11, -1 6380; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6381; GFX6-NEXT: s_mov_b32 s5, s12 6382; GFX6-NEXT: s_mov_b32 s6, s11 6383; GFX6-NEXT: s_mov_b32 s7, s10 6384; GFX6-NEXT: v_mov_b32_e32 v0, s9 6385; GFX6-NEXT: v_mov_b32_e32 v2, s8 6386; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6387; GFX6-NEXT: v_mov_b32_e32 v1, v2 6388; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6389; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6390; GFX6-NEXT: s_waitcnt vmcnt(0) 6391; GFX6-NEXT: buffer_wbinvl1 6392; GFX6-NEXT: s_endpgm 6393; 6394; GFX7-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6395; GFX7: ; %bb.0: ; %entry 6396; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 6397; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6398; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 6399; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 6400; GFX7-NEXT: s_mov_b64 s[10:11], 16 6401; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6402; GFX7-NEXT: s_mov_b32 s4, s8 6403; GFX7-NEXT: s_mov_b32 s5, s9 6404; GFX7-NEXT: s_mov_b32 s9, s10 6405; GFX7-NEXT: s_mov_b32 s8, s11 6406; GFX7-NEXT: s_add_u32 s4, s4, s9 6407; GFX7-NEXT: s_addc_u32 s8, s5, s8 6408; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6409; GFX7-NEXT: s_mov_b32 s5, s8 6410; GFX7-NEXT: v_mov_b32_e32 v2, s7 6411; GFX7-NEXT: v_mov_b32_e32 v0, s6 6412; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6413; GFX7-NEXT: v_mov_b32_e32 v3, v0 6414; GFX7-NEXT: v_mov_b32_e32 v0, s4 6415; GFX7-NEXT: v_mov_b32_e32 v1, s5 6416; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6417; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6418; GFX7-NEXT: s_waitcnt vmcnt(0) 6419; GFX7-NEXT: buffer_wbinvl1_vol 6420; GFX7-NEXT: s_endpgm 6421; 6422; GFX10-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6423; GFX10-WGP: ; %bb.0: ; %entry 6424; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6425; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6426; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6427; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6428; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6429; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6430; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6431; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6432; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6433; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6434; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6435; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 6436; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6437; GFX10-WGP-NEXT: buffer_gl1_inv 6438; GFX10-WGP-NEXT: buffer_gl0_inv 6439; GFX10-WGP-NEXT: s_endpgm 6440; 6441; GFX10-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6442; GFX10-CU: ; %bb.0: ; %entry 6443; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6444; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6445; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6446; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6447; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6448; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6449; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6450; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6451; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6452; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6453; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6454; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 6455; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6456; GFX10-CU-NEXT: buffer_gl1_inv 6457; GFX10-CU-NEXT: buffer_gl0_inv 6458; GFX10-CU-NEXT: s_endpgm 6459; 6460; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6461; SKIP-CACHE-INV: ; %bb.0: ; %entry 6462; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6463; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6464; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6465; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6466; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6467; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6468; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6469; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6470; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6471; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6472; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6473; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6474; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6475; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6476; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6477; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6478; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6479; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6480; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6481; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6482; SKIP-CACHE-INV-NEXT: s_endpgm 6483; 6484; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6485; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6486; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6487; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6488; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6489; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6490; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6491; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6492; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6493; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6494; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6495; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6496; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 6497; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6498; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6499; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6500; 6501; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6502; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6503; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6504; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6505; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6506; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6507; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6508; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6509; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6510; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6511; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6512; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6513; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 6514; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6515; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6516; GFX90A-TGSPLIT-NEXT: s_endpgm 6517; 6518; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6519; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6520; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6521; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6522; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6523; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6524; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6525; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6526; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6527; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6528; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6529; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 6530; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6531; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6532; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6533; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 6534; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6535; 6536; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6537; GFX940-TGSPLIT: ; %bb.0: ; %entry 6538; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6539; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6540; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6541; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6542; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6543; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6544; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6545; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6546; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6547; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 6548; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6549; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6550; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6551; GFX940-TGSPLIT-NEXT: buffer_inv sc1 6552; GFX940-TGSPLIT-NEXT: s_endpgm 6553; 6554; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6555; GFX11-WGP: ; %bb.0: ; %entry 6556; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6557; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6558; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6559; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6560; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6561; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6562; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6563; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6564; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6565; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6566; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6567; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6568; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6569; GFX11-WGP-NEXT: buffer_gl1_inv 6570; GFX11-WGP-NEXT: buffer_gl0_inv 6571; GFX11-WGP-NEXT: s_endpgm 6572; 6573; GFX11-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6574; GFX11-CU: ; %bb.0: ; %entry 6575; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6576; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6577; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6578; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6579; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6580; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6581; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6582; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6583; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6584; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6585; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6586; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6587; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6588; GFX11-CU-NEXT: buffer_gl1_inv 6589; GFX11-CU-NEXT: buffer_gl0_inv 6590; GFX11-CU-NEXT: s_endpgm 6591; 6592; GFX12-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6593; GFX12-WGP: ; %bb.0: ; %entry 6594; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6595; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6596; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6597; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6598; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6599; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6600; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6601; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6602; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6603; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6604; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6605; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6606; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6607; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6608; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6609; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 6610; GFX12-WGP-NEXT: s_endpgm 6611; 6612; GFX12-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: 6613; GFX12-CU: ; %bb.0: ; %entry 6614; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6615; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6616; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6617; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6618; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6619; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6620; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6621; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6622; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6623; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6624; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6625; GFX12-CU-NEXT: s_wait_storecnt 0x0 6626; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6627; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6628; GFX12-CU-NEXT: s_wait_storecnt 0x0 6629; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 6630; GFX12-CU-NEXT: s_endpgm 6631 ptr addrspace(1) %out, i32 %in, i32 %old) { 6632entry: 6633 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6634 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst 6635 ret void 6636} 6637 6638define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( 6639; GFX6-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6640; GFX6: ; %bb.0: ; %entry 6641; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6642; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6643; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6644; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6645; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6646; GFX6-NEXT: s_mov_b32 s12, s5 6647; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6648; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6649; GFX6-NEXT: s_mov_b32 s11, -1 6650; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6651; GFX6-NEXT: s_mov_b32 s5, s12 6652; GFX6-NEXT: s_mov_b32 s6, s11 6653; GFX6-NEXT: s_mov_b32 s7, s10 6654; GFX6-NEXT: v_mov_b32_e32 v0, s9 6655; GFX6-NEXT: v_mov_b32_e32 v2, s8 6656; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6657; GFX6-NEXT: v_mov_b32_e32 v1, v2 6658; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6659; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6660; GFX6-NEXT: s_waitcnt vmcnt(0) 6661; GFX6-NEXT: buffer_wbinvl1 6662; GFX6-NEXT: s_endpgm 6663; 6664; GFX7-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6665; GFX7: ; %bb.0: ; %entry 6666; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 6667; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6668; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 6669; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 6670; GFX7-NEXT: s_mov_b64 s[10:11], 16 6671; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6672; GFX7-NEXT: s_mov_b32 s4, s8 6673; GFX7-NEXT: s_mov_b32 s5, s9 6674; GFX7-NEXT: s_mov_b32 s9, s10 6675; GFX7-NEXT: s_mov_b32 s8, s11 6676; GFX7-NEXT: s_add_u32 s4, s4, s9 6677; GFX7-NEXT: s_addc_u32 s8, s5, s8 6678; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6679; GFX7-NEXT: s_mov_b32 s5, s8 6680; GFX7-NEXT: v_mov_b32_e32 v2, s7 6681; GFX7-NEXT: v_mov_b32_e32 v0, s6 6682; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6683; GFX7-NEXT: v_mov_b32_e32 v3, v0 6684; GFX7-NEXT: v_mov_b32_e32 v0, s4 6685; GFX7-NEXT: v_mov_b32_e32 v1, s5 6686; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6687; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6688; GFX7-NEXT: s_waitcnt vmcnt(0) 6689; GFX7-NEXT: buffer_wbinvl1_vol 6690; GFX7-NEXT: s_endpgm 6691; 6692; GFX10-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6693; GFX10-WGP: ; %bb.0: ; %entry 6694; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6695; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6696; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6697; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6698; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6699; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6700; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6701; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6702; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6703; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6704; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6705; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 6706; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6707; GFX10-WGP-NEXT: buffer_gl1_inv 6708; GFX10-WGP-NEXT: buffer_gl0_inv 6709; GFX10-WGP-NEXT: s_endpgm 6710; 6711; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6712; GFX10-CU: ; %bb.0: ; %entry 6713; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6714; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6715; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6716; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6717; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6718; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6719; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6720; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6721; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6722; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6723; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6724; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 6725; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6726; GFX10-CU-NEXT: buffer_gl1_inv 6727; GFX10-CU-NEXT: buffer_gl0_inv 6728; GFX10-CU-NEXT: s_endpgm 6729; 6730; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6731; SKIP-CACHE-INV: ; %bb.0: ; %entry 6732; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6733; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6734; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6735; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6736; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6737; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6738; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6739; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6740; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6741; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6742; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6743; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6744; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6745; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6746; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6747; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6748; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6749; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6750; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6751; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6752; SKIP-CACHE-INV-NEXT: s_endpgm 6753; 6754; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6755; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6756; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6757; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6758; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6759; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6760; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6761; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6762; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6763; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6764; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6765; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6766; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 6767; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6768; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6769; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6770; 6771; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6772; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6773; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6774; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6775; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6776; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6777; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6778; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6779; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6780; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6781; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6782; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6783; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 6784; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6785; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6786; GFX90A-TGSPLIT-NEXT: s_endpgm 6787; 6788; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6789; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6790; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6791; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6792; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6793; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6794; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6795; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6796; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6797; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6798; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6799; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 6800; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6801; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6802; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6803; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 6804; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6805; 6806; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6807; GFX940-TGSPLIT: ; %bb.0: ; %entry 6808; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6809; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6810; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6811; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6812; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6813; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6814; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6815; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6816; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6817; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 6818; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6819; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 6820; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6821; GFX940-TGSPLIT-NEXT: buffer_inv sc1 6822; GFX940-TGSPLIT-NEXT: s_endpgm 6823; 6824; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6825; GFX11-WGP: ; %bb.0: ; %entry 6826; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6827; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6828; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6829; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6830; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6831; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6832; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6833; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6834; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6835; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6836; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6837; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6838; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6839; GFX11-WGP-NEXT: buffer_gl1_inv 6840; GFX11-WGP-NEXT: buffer_gl0_inv 6841; GFX11-WGP-NEXT: s_endpgm 6842; 6843; GFX11-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6844; GFX11-CU: ; %bb.0: ; %entry 6845; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6846; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6847; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6848; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6849; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6850; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6851; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6852; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6853; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6854; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6855; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6856; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 6857; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6858; GFX11-CU-NEXT: buffer_gl1_inv 6859; GFX11-CU-NEXT: buffer_gl0_inv 6860; GFX11-CU-NEXT: s_endpgm 6861; 6862; GFX12-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6863; GFX12-WGP: ; %bb.0: ; %entry 6864; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6865; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6866; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6867; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6868; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6869; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6870; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6871; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6872; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6873; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6874; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6875; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6876; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6877; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6878; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6879; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 6880; GFX12-WGP-NEXT: s_endpgm 6881; 6882; GFX12-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 6883; GFX12-CU: ; %bb.0: ; %entry 6884; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6885; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6886; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6887; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6888; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6889; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6890; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6891; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6892; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6893; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6894; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6895; GFX12-CU-NEXT: s_wait_storecnt 0x0 6896; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6897; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 6898; GFX12-CU-NEXT: s_wait_storecnt 0x0 6899; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 6900; GFX12-CU-NEXT: s_endpgm 6901 ptr addrspace(1) %out, i32 %in, i32 %old) { 6902entry: 6903 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6904 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst 6905 ret void 6906} 6907 6908define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg( 6909; GFX6-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 6910; GFX6: ; %bb.0: ; %entry 6911; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6912; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6913; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6914; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6915; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6916; GFX6-NEXT: s_mov_b32 s12, s5 6917; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6918; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6919; GFX6-NEXT: s_mov_b32 s11, -1 6920; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6921; GFX6-NEXT: s_mov_b32 s5, s12 6922; GFX6-NEXT: s_mov_b32 s6, s11 6923; GFX6-NEXT: s_mov_b32 s7, s10 6924; GFX6-NEXT: v_mov_b32_e32 v0, s9 6925; GFX6-NEXT: v_mov_b32_e32 v2, s8 6926; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6927; GFX6-NEXT: v_mov_b32_e32 v1, v2 6928; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6929; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6930; GFX6-NEXT: s_waitcnt vmcnt(0) 6931; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6932; GFX6-NEXT: s_endpgm 6933; 6934; GFX7-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 6935; GFX7: ; %bb.0: ; %entry 6936; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6937; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6938; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6939; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6940; GFX7-NEXT: s_mov_b64 s[12:13], 16 6941; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6942; GFX7-NEXT: s_mov_b32 s6, s4 6943; GFX7-NEXT: s_mov_b32 s7, s5 6944; GFX7-NEXT: s_mov_b32 s11, s12 6945; GFX7-NEXT: s_mov_b32 s10, s13 6946; GFX7-NEXT: s_add_u32 s6, s6, s11 6947; GFX7-NEXT: s_addc_u32 s10, s7, s10 6948; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6949; GFX7-NEXT: s_mov_b32 s7, s10 6950; GFX7-NEXT: v_mov_b32_e32 v2, s9 6951; GFX7-NEXT: v_mov_b32_e32 v0, s8 6952; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6953; GFX7-NEXT: v_mov_b32_e32 v3, v0 6954; GFX7-NEXT: v_mov_b32_e32 v0, s6 6955; GFX7-NEXT: v_mov_b32_e32 v1, s7 6956; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6957; GFX7-NEXT: v_mov_b32_e32 v0, s4 6958; GFX7-NEXT: v_mov_b32_e32 v1, s5 6959; GFX7-NEXT: s_waitcnt vmcnt(0) 6960; GFX7-NEXT: flat_store_dword v[0:1], v2 6961; GFX7-NEXT: s_endpgm 6962; 6963; GFX10-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 6964; GFX10-WGP: ; %bb.0: ; %entry 6965; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6966; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6967; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6968; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6969; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6970; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6971; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6972; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6973; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6974; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6975; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6976; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6977; GFX10-WGP-NEXT: s_endpgm 6978; 6979; GFX10-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 6980; GFX10-CU: ; %bb.0: ; %entry 6981; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6982; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6983; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6984; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6985; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6986; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6987; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6988; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6989; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6990; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6991; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6992; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6993; GFX10-CU-NEXT: s_endpgm 6994; 6995; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 6996; SKIP-CACHE-INV: ; %bb.0: ; %entry 6997; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6998; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6999; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7000; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7001; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7002; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7003; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7004; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7005; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7006; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7007; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7008; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7009; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7010; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7011; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7012; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7013; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7014; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7015; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7016; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7017; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7018; SKIP-CACHE-INV-NEXT: s_endpgm 7019; 7020; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7021; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7022; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7023; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7024; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7025; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7026; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7027; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7028; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7029; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7030; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7031; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7032; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7033; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7034; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7035; 7036; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7037; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7038; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7039; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7040; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7041; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7042; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7043; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7044; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7045; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7046; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7047; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7048; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7049; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7050; GFX90A-TGSPLIT-NEXT: s_endpgm 7051; 7052; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7053; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7054; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7055; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7056; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7057; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7058; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7059; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7060; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7061; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7062; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7063; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7064; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7065; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7066; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7067; 7068; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7069; GFX940-TGSPLIT: ; %bb.0: ; %entry 7070; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7071; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7072; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7073; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7074; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7075; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7076; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7077; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7078; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7079; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7080; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7081; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7082; GFX940-TGSPLIT-NEXT: s_endpgm 7083; 7084; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7085; GFX11-WGP: ; %bb.0: ; %entry 7086; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7087; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7088; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7089; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7090; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7091; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7092; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7093; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7094; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7095; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7096; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7097; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7098; GFX11-WGP-NEXT: s_endpgm 7099; 7100; GFX11-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7101; GFX11-CU: ; %bb.0: ; %entry 7102; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7103; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7104; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7105; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7106; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7107; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7108; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7109; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7110; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7111; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7112; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7113; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7114; GFX11-CU-NEXT: s_endpgm 7115; 7116; GFX12-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7117; GFX12-WGP: ; %bb.0: ; %entry 7118; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7119; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7120; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7121; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7122; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7123; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7124; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7125; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7126; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7127; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7128; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7129; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7130; GFX12-WGP-NEXT: s_endpgm 7131; 7132; GFX12-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: 7133; GFX12-CU: ; %bb.0: ; %entry 7134; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7135; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7136; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7137; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7138; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7139; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7140; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7141; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7142; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7143; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7144; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7145; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7146; GFX12-CU-NEXT: s_endpgm 7147 ptr addrspace(1) %out, i32 %in, i32 %old) { 7148entry: 7149 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7150 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic 7151 %val0 = extractvalue { i32, i1 } %val, 0 7152 store i32 %val0, ptr addrspace(1) %out, align 4 7153 ret void 7154} 7155 7156define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( 7157; GFX6-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7158; GFX6: ; %bb.0: ; %entry 7159; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7160; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7161; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7162; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7163; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7164; GFX6-NEXT: s_mov_b32 s12, s5 7165; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7166; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7167; GFX6-NEXT: s_mov_b32 s11, -1 7168; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7169; GFX6-NEXT: s_mov_b32 s5, s12 7170; GFX6-NEXT: s_mov_b32 s6, s11 7171; GFX6-NEXT: s_mov_b32 s7, s10 7172; GFX6-NEXT: v_mov_b32_e32 v0, s9 7173; GFX6-NEXT: v_mov_b32_e32 v2, s8 7174; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7175; GFX6-NEXT: v_mov_b32_e32 v1, v2 7176; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7177; GFX6-NEXT: s_waitcnt vmcnt(0) 7178; GFX6-NEXT: buffer_wbinvl1 7179; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7180; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7181; GFX6-NEXT: s_endpgm 7182; 7183; GFX7-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7184; GFX7: ; %bb.0: ; %entry 7185; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7186; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7187; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7188; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7189; GFX7-NEXT: s_mov_b64 s[12:13], 16 7190; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7191; GFX7-NEXT: s_mov_b32 s6, s4 7192; GFX7-NEXT: s_mov_b32 s7, s5 7193; GFX7-NEXT: s_mov_b32 s11, s12 7194; GFX7-NEXT: s_mov_b32 s10, s13 7195; GFX7-NEXT: s_add_u32 s6, s6, s11 7196; GFX7-NEXT: s_addc_u32 s10, s7, s10 7197; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7198; GFX7-NEXT: s_mov_b32 s7, s10 7199; GFX7-NEXT: v_mov_b32_e32 v2, s9 7200; GFX7-NEXT: v_mov_b32_e32 v0, s8 7201; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7202; GFX7-NEXT: v_mov_b32_e32 v3, v0 7203; GFX7-NEXT: v_mov_b32_e32 v0, s6 7204; GFX7-NEXT: v_mov_b32_e32 v1, s7 7205; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7206; GFX7-NEXT: s_waitcnt vmcnt(0) 7207; GFX7-NEXT: buffer_wbinvl1_vol 7208; GFX7-NEXT: v_mov_b32_e32 v0, s4 7209; GFX7-NEXT: v_mov_b32_e32 v1, s5 7210; GFX7-NEXT: flat_store_dword v[0:1], v2 7211; GFX7-NEXT: s_endpgm 7212; 7213; GFX10-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7214; GFX10-WGP: ; %bb.0: ; %entry 7215; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7216; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7217; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7218; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7219; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7220; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7221; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7222; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7223; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7224; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7225; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7226; GFX10-WGP-NEXT: buffer_gl1_inv 7227; GFX10-WGP-NEXT: buffer_gl0_inv 7228; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7229; GFX10-WGP-NEXT: s_endpgm 7230; 7231; GFX10-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7232; GFX10-CU: ; %bb.0: ; %entry 7233; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7234; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7235; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7236; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7237; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7238; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7239; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7240; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7241; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7242; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7243; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7244; GFX10-CU-NEXT: buffer_gl1_inv 7245; GFX10-CU-NEXT: buffer_gl0_inv 7246; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7247; GFX10-CU-NEXT: s_endpgm 7248; 7249; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7250; SKIP-CACHE-INV: ; %bb.0: ; %entry 7251; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7252; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7253; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7254; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7255; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7256; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7257; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7258; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7259; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7260; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7261; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7262; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7263; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7264; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7265; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7266; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7267; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7268; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7269; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7270; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7271; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7272; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7273; SKIP-CACHE-INV-NEXT: s_endpgm 7274; 7275; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7276; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7277; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7278; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7279; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7280; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7281; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7282; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7283; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7284; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7285; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7286; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7287; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7288; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7289; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7290; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7291; 7292; GFX90A-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7293; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7294; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7295; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7296; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7297; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7298; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7299; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7300; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7301; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7302; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7303; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7304; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7305; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7306; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7307; GFX90A-TGSPLIT-NEXT: s_endpgm 7308; 7309; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7310; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7311; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7312; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7313; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7314; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7315; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7316; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7317; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7318; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7319; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7320; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7321; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7322; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 7323; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7324; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7325; 7326; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7327; GFX940-TGSPLIT: ; %bb.0: ; %entry 7328; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7329; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7330; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7331; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7332; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7333; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7334; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7335; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7336; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7337; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7338; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7339; GFX940-TGSPLIT-NEXT: buffer_inv sc1 7340; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7341; GFX940-TGSPLIT-NEXT: s_endpgm 7342; 7343; GFX11-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7344; GFX11-WGP: ; %bb.0: ; %entry 7345; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7346; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7347; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7348; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7349; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7350; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7351; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7352; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7353; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7354; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7355; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7356; GFX11-WGP-NEXT: buffer_gl1_inv 7357; GFX11-WGP-NEXT: buffer_gl0_inv 7358; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7359; GFX11-WGP-NEXT: s_endpgm 7360; 7361; GFX11-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7362; GFX11-CU: ; %bb.0: ; %entry 7363; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7364; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7365; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7366; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7367; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7368; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7369; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7370; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7371; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7372; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7373; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7374; GFX11-CU-NEXT: buffer_gl1_inv 7375; GFX11-CU-NEXT: buffer_gl0_inv 7376; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7377; GFX11-CU-NEXT: s_endpgm 7378; 7379; GFX12-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7380; GFX12-WGP: ; %bb.0: ; %entry 7381; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7382; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7383; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7384; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7385; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7386; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7387; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7388; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7389; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7390; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7391; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7392; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 7393; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7394; GFX12-WGP-NEXT: s_endpgm 7395; 7396; GFX12-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 7397; GFX12-CU: ; %bb.0: ; %entry 7398; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7399; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7400; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7401; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7402; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7403; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7404; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7405; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7406; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7407; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7408; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7409; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 7410; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7411; GFX12-CU-NEXT: s_endpgm 7412 ptr addrspace(1) %out, i32 %in, i32 %old) { 7413entry: 7414 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7415 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic 7416 %val0 = extractvalue { i32, i1 } %val, 0 7417 store i32 %val0, ptr addrspace(1) %out, align 4 7418 ret void 7419} 7420 7421define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg( 7422; GFX6-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7423; GFX6: ; %bb.0: ; %entry 7424; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7425; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7426; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7427; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7428; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7429; GFX6-NEXT: s_mov_b32 s12, s5 7430; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7431; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7432; GFX6-NEXT: s_mov_b32 s11, -1 7433; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7434; GFX6-NEXT: s_mov_b32 s5, s12 7435; GFX6-NEXT: s_mov_b32 s6, s11 7436; GFX6-NEXT: s_mov_b32 s7, s10 7437; GFX6-NEXT: v_mov_b32_e32 v0, s9 7438; GFX6-NEXT: v_mov_b32_e32 v2, s8 7439; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7440; GFX6-NEXT: v_mov_b32_e32 v1, v2 7441; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7442; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7443; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7444; GFX6-NEXT: s_waitcnt vmcnt(0) 7445; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7446; GFX6-NEXT: s_endpgm 7447; 7448; GFX7-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7449; GFX7: ; %bb.0: ; %entry 7450; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7451; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7452; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7453; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7454; GFX7-NEXT: s_mov_b64 s[12:13], 16 7455; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7456; GFX7-NEXT: s_mov_b32 s6, s4 7457; GFX7-NEXT: s_mov_b32 s7, s5 7458; GFX7-NEXT: s_mov_b32 s11, s12 7459; GFX7-NEXT: s_mov_b32 s10, s13 7460; GFX7-NEXT: s_add_u32 s6, s6, s11 7461; GFX7-NEXT: s_addc_u32 s10, s7, s10 7462; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7463; GFX7-NEXT: s_mov_b32 s7, s10 7464; GFX7-NEXT: v_mov_b32_e32 v2, s9 7465; GFX7-NEXT: v_mov_b32_e32 v0, s8 7466; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7467; GFX7-NEXT: v_mov_b32_e32 v3, v0 7468; GFX7-NEXT: v_mov_b32_e32 v0, s6 7469; GFX7-NEXT: v_mov_b32_e32 v1, s7 7470; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7471; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7472; GFX7-NEXT: v_mov_b32_e32 v0, s4 7473; GFX7-NEXT: v_mov_b32_e32 v1, s5 7474; GFX7-NEXT: s_waitcnt vmcnt(0) 7475; GFX7-NEXT: flat_store_dword v[0:1], v2 7476; GFX7-NEXT: s_endpgm 7477; 7478; GFX10-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7479; GFX10-WGP: ; %bb.0: ; %entry 7480; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7481; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7482; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7483; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7484; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7485; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7486; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7487; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7488; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7489; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7490; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7491; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7492; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7493; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7494; GFX10-WGP-NEXT: s_endpgm 7495; 7496; GFX10-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7497; GFX10-CU: ; %bb.0: ; %entry 7498; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7499; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7500; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7501; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7502; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7503; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7504; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7505; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7506; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7507; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7508; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7509; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7510; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7511; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7512; GFX10-CU-NEXT: s_endpgm 7513; 7514; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7515; SKIP-CACHE-INV: ; %bb.0: ; %entry 7516; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7517; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7518; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7519; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7520; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7521; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7522; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7523; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7524; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7525; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7526; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7527; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7528; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7529; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7530; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7531; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7532; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7533; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7534; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7535; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7536; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7537; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7538; SKIP-CACHE-INV-NEXT: s_endpgm 7539; 7540; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7541; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7542; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7543; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7544; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7545; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7546; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7547; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7548; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7549; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7550; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7551; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7552; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7553; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7554; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7555; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7556; 7557; GFX90A-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7558; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7559; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7560; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7561; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7562; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7563; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7564; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7565; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7566; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7567; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7568; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7569; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7570; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7571; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7572; GFX90A-TGSPLIT-NEXT: s_endpgm 7573; 7574; GFX940-NOTTGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7575; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7576; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7577; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7578; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7579; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7580; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7581; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7582; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7583; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7584; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7585; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 7586; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7587; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7588; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7589; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7590; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7591; 7592; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7593; GFX940-TGSPLIT: ; %bb.0: ; %entry 7594; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7595; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7596; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7597; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7598; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7599; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7600; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7601; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7602; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7603; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 7604; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7605; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7606; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7607; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7608; GFX940-TGSPLIT-NEXT: s_endpgm 7609; 7610; GFX11-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7611; GFX11-WGP: ; %bb.0: ; %entry 7612; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7613; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7614; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7615; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7616; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7617; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7618; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7619; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7620; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7621; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7622; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7623; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7624; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7625; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7626; GFX11-WGP-NEXT: s_endpgm 7627; 7628; GFX11-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7629; GFX11-CU: ; %bb.0: ; %entry 7630; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7631; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7632; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7633; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7634; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7635; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7636; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7637; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7638; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7639; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7640; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 7641; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7642; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7643; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7644; GFX11-CU-NEXT: s_endpgm 7645; 7646; GFX12-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7647; GFX12-WGP: ; %bb.0: ; %entry 7648; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7649; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7650; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7651; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7652; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7653; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7654; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7655; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7656; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7657; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7658; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7659; GFX12-WGP-NEXT: s_wait_storecnt 0x0 7660; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7661; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7662; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7663; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7664; GFX12-WGP-NEXT: s_endpgm 7665; 7666; GFX12-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg: 7667; GFX12-CU: ; %bb.0: ; %entry 7668; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7669; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7670; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7671; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7672; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7673; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7674; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7675; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7676; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7677; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7678; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7679; GFX12-CU-NEXT: s_wait_storecnt 0x0 7680; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7681; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7682; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7683; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7684; GFX12-CU-NEXT: s_endpgm 7685 ptr addrspace(1) %out, i32 %in, i32 %old) { 7686entry: 7687 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7688 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release monotonic 7689 %val0 = extractvalue { i32, i1 } %val, 0 7690 store i32 %val0, ptr addrspace(1) %out, align 4 7691 ret void 7692} 7693 7694define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( 7695; GFX6-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7696; GFX6: ; %bb.0: ; %entry 7697; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7698; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7699; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7700; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7701; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7702; GFX6-NEXT: s_mov_b32 s12, s5 7703; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7704; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7705; GFX6-NEXT: s_mov_b32 s11, -1 7706; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7707; GFX6-NEXT: s_mov_b32 s5, s12 7708; GFX6-NEXT: s_mov_b32 s6, s11 7709; GFX6-NEXT: s_mov_b32 s7, s10 7710; GFX6-NEXT: v_mov_b32_e32 v0, s9 7711; GFX6-NEXT: v_mov_b32_e32 v2, s8 7712; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7713; GFX6-NEXT: v_mov_b32_e32 v1, v2 7714; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7715; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7716; GFX6-NEXT: s_waitcnt vmcnt(0) 7717; GFX6-NEXT: buffer_wbinvl1 7718; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7719; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7720; GFX6-NEXT: s_endpgm 7721; 7722; GFX7-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7723; GFX7: ; %bb.0: ; %entry 7724; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7725; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7726; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7727; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7728; GFX7-NEXT: s_mov_b64 s[12:13], 16 7729; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7730; GFX7-NEXT: s_mov_b32 s6, s4 7731; GFX7-NEXT: s_mov_b32 s7, s5 7732; GFX7-NEXT: s_mov_b32 s11, s12 7733; GFX7-NEXT: s_mov_b32 s10, s13 7734; GFX7-NEXT: s_add_u32 s6, s6, s11 7735; GFX7-NEXT: s_addc_u32 s10, s7, s10 7736; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7737; GFX7-NEXT: s_mov_b32 s7, s10 7738; GFX7-NEXT: v_mov_b32_e32 v2, s9 7739; GFX7-NEXT: v_mov_b32_e32 v0, s8 7740; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7741; GFX7-NEXT: v_mov_b32_e32 v3, v0 7742; GFX7-NEXT: v_mov_b32_e32 v0, s6 7743; GFX7-NEXT: v_mov_b32_e32 v1, s7 7744; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7745; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7746; GFX7-NEXT: s_waitcnt vmcnt(0) 7747; GFX7-NEXT: buffer_wbinvl1_vol 7748; GFX7-NEXT: v_mov_b32_e32 v0, s4 7749; GFX7-NEXT: v_mov_b32_e32 v1, s5 7750; GFX7-NEXT: flat_store_dword v[0:1], v2 7751; GFX7-NEXT: s_endpgm 7752; 7753; GFX10-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7754; GFX10-WGP: ; %bb.0: ; %entry 7755; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7756; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7757; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7758; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7759; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7760; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7761; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7762; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7763; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7764; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7765; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7766; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7767; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7768; GFX10-WGP-NEXT: buffer_gl1_inv 7769; GFX10-WGP-NEXT: buffer_gl0_inv 7770; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7771; GFX10-WGP-NEXT: s_endpgm 7772; 7773; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7774; GFX10-CU: ; %bb.0: ; %entry 7775; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7776; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7777; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7778; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7779; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7780; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7781; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7782; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7783; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7784; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7785; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7786; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7787; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7788; GFX10-CU-NEXT: buffer_gl1_inv 7789; GFX10-CU-NEXT: buffer_gl0_inv 7790; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7791; GFX10-CU-NEXT: s_endpgm 7792; 7793; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7794; SKIP-CACHE-INV: ; %bb.0: ; %entry 7795; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7796; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7797; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7798; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7799; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7800; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7801; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7802; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7803; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7804; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7805; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7806; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7807; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7808; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7809; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7810; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7811; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7812; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7813; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7814; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7815; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7816; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7817; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7818; SKIP-CACHE-INV-NEXT: s_endpgm 7819; 7820; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7821; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7822; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7823; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7824; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7825; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7826; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7827; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7828; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7829; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7830; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7831; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7832; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7833; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7834; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7835; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7836; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7837; 7838; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7839; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7840; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7841; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7842; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7843; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7844; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7845; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7846; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7847; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7848; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7849; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7850; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7851; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7852; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7853; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7854; GFX90A-TGSPLIT-NEXT: s_endpgm 7855; 7856; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7857; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7858; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7859; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7860; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7861; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7862; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7863; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7864; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7865; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7866; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7867; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 7868; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7869; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7870; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7871; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 7872; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7873; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7874; 7875; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7876; GFX940-TGSPLIT: ; %bb.0: ; %entry 7877; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7878; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7879; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7880; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7881; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7882; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7883; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7884; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7885; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7886; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 7887; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7888; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7889; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7890; GFX940-TGSPLIT-NEXT: buffer_inv sc1 7891; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7892; GFX940-TGSPLIT-NEXT: s_endpgm 7893; 7894; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7895; GFX11-WGP: ; %bb.0: ; %entry 7896; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7897; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7898; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7899; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7900; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7901; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7902; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7903; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7904; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7905; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7906; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7907; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7908; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7909; GFX11-WGP-NEXT: buffer_gl1_inv 7910; GFX11-WGP-NEXT: buffer_gl0_inv 7911; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7912; GFX11-WGP-NEXT: s_endpgm 7913; 7914; GFX11-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7915; GFX11-CU: ; %bb.0: ; %entry 7916; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7917; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7918; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7919; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7920; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7921; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7922; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7923; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7924; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7925; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7926; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 7927; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7928; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7929; GFX11-CU-NEXT: buffer_gl1_inv 7930; GFX11-CU-NEXT: buffer_gl0_inv 7931; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7932; GFX11-CU-NEXT: s_endpgm 7933; 7934; GFX12-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7935; GFX12-WGP: ; %bb.0: ; %entry 7936; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7937; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7938; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7939; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7940; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7941; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7942; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7943; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7944; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7945; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7946; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7947; GFX12-WGP-NEXT: s_wait_storecnt 0x0 7948; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7949; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7950; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7951; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7952; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7953; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 7954; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7955; GFX12-WGP-NEXT: s_endpgm 7956; 7957; GFX12-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 7958; GFX12-CU: ; %bb.0: ; %entry 7959; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7960; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7961; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7962; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7963; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7964; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7965; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7966; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7967; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7968; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7969; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7970; GFX12-CU-NEXT: s_wait_storecnt 0x0 7971; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7972; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 7973; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7974; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7975; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7976; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 7977; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7978; GFX12-CU-NEXT: s_endpgm 7979 ptr addrspace(1) %out, i32 %in, i32 %old) { 7980entry: 7981 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7982 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic 7983 %val0 = extractvalue { i32, i1 } %val, 0 7984 store i32 %val0, ptr addrspace(1) %out, align 4 7985 ret void 7986} 7987 7988define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( 7989; GFX6-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 7990; GFX6: ; %bb.0: ; %entry 7991; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7992; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7993; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7994; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7995; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7996; GFX6-NEXT: s_mov_b32 s12, s5 7997; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7998; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7999; GFX6-NEXT: s_mov_b32 s11, -1 8000; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8001; GFX6-NEXT: s_mov_b32 s5, s12 8002; GFX6-NEXT: s_mov_b32 s6, s11 8003; GFX6-NEXT: s_mov_b32 s7, s10 8004; GFX6-NEXT: v_mov_b32_e32 v0, s9 8005; GFX6-NEXT: v_mov_b32_e32 v2, s8 8006; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8007; GFX6-NEXT: v_mov_b32_e32 v1, v2 8008; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8009; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8010; GFX6-NEXT: s_waitcnt vmcnt(0) 8011; GFX6-NEXT: buffer_wbinvl1 8012; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8013; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8014; GFX6-NEXT: s_endpgm 8015; 8016; GFX7-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8017; GFX7: ; %bb.0: ; %entry 8018; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8019; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8020; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8021; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8022; GFX7-NEXT: s_mov_b64 s[12:13], 16 8023; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8024; GFX7-NEXT: s_mov_b32 s6, s4 8025; GFX7-NEXT: s_mov_b32 s7, s5 8026; GFX7-NEXT: s_mov_b32 s11, s12 8027; GFX7-NEXT: s_mov_b32 s10, s13 8028; GFX7-NEXT: s_add_u32 s6, s6, s11 8029; GFX7-NEXT: s_addc_u32 s10, s7, s10 8030; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8031; GFX7-NEXT: s_mov_b32 s7, s10 8032; GFX7-NEXT: v_mov_b32_e32 v2, s9 8033; GFX7-NEXT: v_mov_b32_e32 v0, s8 8034; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8035; GFX7-NEXT: v_mov_b32_e32 v3, v0 8036; GFX7-NEXT: v_mov_b32_e32 v0, s6 8037; GFX7-NEXT: v_mov_b32_e32 v1, s7 8038; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8039; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8040; GFX7-NEXT: s_waitcnt vmcnt(0) 8041; GFX7-NEXT: buffer_wbinvl1_vol 8042; GFX7-NEXT: v_mov_b32_e32 v0, s4 8043; GFX7-NEXT: v_mov_b32_e32 v1, s5 8044; GFX7-NEXT: flat_store_dword v[0:1], v2 8045; GFX7-NEXT: s_endpgm 8046; 8047; GFX10-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8048; GFX10-WGP: ; %bb.0: ; %entry 8049; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8050; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8051; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8052; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8053; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8054; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8055; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8056; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8057; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8058; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8059; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8060; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8061; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8062; GFX10-WGP-NEXT: buffer_gl1_inv 8063; GFX10-WGP-NEXT: buffer_gl0_inv 8064; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8065; GFX10-WGP-NEXT: s_endpgm 8066; 8067; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8068; GFX10-CU: ; %bb.0: ; %entry 8069; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8070; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8071; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8072; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8073; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8074; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8075; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8076; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8077; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8078; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8079; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 8080; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8081; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8082; GFX10-CU-NEXT: buffer_gl1_inv 8083; GFX10-CU-NEXT: buffer_gl0_inv 8084; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8085; GFX10-CU-NEXT: s_endpgm 8086; 8087; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8088; SKIP-CACHE-INV: ; %bb.0: ; %entry 8089; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8090; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8091; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8092; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8093; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8094; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8095; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8096; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8097; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8098; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8099; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8100; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8101; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8102; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8103; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8104; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8105; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8106; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8107; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8108; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8109; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8110; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8111; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8112; SKIP-CACHE-INV-NEXT: s_endpgm 8113; 8114; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8115; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8116; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8117; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8118; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8119; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8120; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8121; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8122; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8123; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8124; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8125; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8126; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8127; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8128; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8129; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8130; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8131; 8132; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8133; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8134; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8135; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8136; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8137; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8138; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8139; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8140; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8141; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8142; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8143; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8144; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8145; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8146; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8147; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8148; GFX90A-TGSPLIT-NEXT: s_endpgm 8149; 8150; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8151; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8152; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8153; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8154; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8155; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8156; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8157; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8158; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8159; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8160; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8161; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 8162; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8163; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8164; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8165; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 8166; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8167; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8168; 8169; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8170; GFX940-TGSPLIT: ; %bb.0: ; %entry 8171; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8172; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8173; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8174; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8175; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8176; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8177; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8178; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8179; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8180; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 8181; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8182; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8183; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8184; GFX940-TGSPLIT-NEXT: buffer_inv sc1 8185; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8186; GFX940-TGSPLIT-NEXT: s_endpgm 8187; 8188; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8189; GFX11-WGP: ; %bb.0: ; %entry 8190; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8191; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8192; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8193; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8194; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8195; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8196; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8197; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8198; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8199; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8200; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8201; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8202; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8203; GFX11-WGP-NEXT: buffer_gl1_inv 8204; GFX11-WGP-NEXT: buffer_gl0_inv 8205; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8206; GFX11-WGP-NEXT: s_endpgm 8207; 8208; GFX11-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8209; GFX11-CU: ; %bb.0: ; %entry 8210; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8211; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8212; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8213; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8214; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8215; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8216; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8217; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8218; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8219; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8220; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 8221; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8222; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8223; GFX11-CU-NEXT: buffer_gl1_inv 8224; GFX11-CU-NEXT: buffer_gl0_inv 8225; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8226; GFX11-CU-NEXT: s_endpgm 8227; 8228; GFX12-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8229; GFX12-WGP: ; %bb.0: ; %entry 8230; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8231; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8232; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8233; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8234; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8235; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8236; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8237; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8238; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8239; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8240; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8241; GFX12-WGP-NEXT: s_wait_storecnt 0x0 8242; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8243; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 8244; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8245; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8246; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8247; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 8248; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8249; GFX12-WGP-NEXT: s_endpgm 8250; 8251; GFX12-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 8252; GFX12-CU: ; %bb.0: ; %entry 8253; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8254; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8255; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8256; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8257; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8258; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8259; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8260; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8261; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8262; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8263; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8264; GFX12-CU-NEXT: s_wait_storecnt 0x0 8265; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8266; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 8267; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8268; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8269; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8270; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 8271; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8272; GFX12-CU-NEXT: s_endpgm 8273 ptr addrspace(1) %out, i32 %in, i32 %old) { 8274entry: 8275 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8276 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic 8277 %val0 = extractvalue { i32, i1 } %val, 0 8278 store i32 %val0, ptr addrspace(1) %out, align 4 8279 ret void 8280} 8281 8282define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( 8283; GFX6-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8284; GFX6: ; %bb.0: ; %entry 8285; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8286; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8287; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8288; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8289; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8290; GFX6-NEXT: s_mov_b32 s12, s5 8291; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8292; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8293; GFX6-NEXT: s_mov_b32 s11, -1 8294; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8295; GFX6-NEXT: s_mov_b32 s5, s12 8296; GFX6-NEXT: s_mov_b32 s6, s11 8297; GFX6-NEXT: s_mov_b32 s7, s10 8298; GFX6-NEXT: v_mov_b32_e32 v0, s9 8299; GFX6-NEXT: v_mov_b32_e32 v2, s8 8300; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8301; GFX6-NEXT: v_mov_b32_e32 v1, v2 8302; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8303; GFX6-NEXT: s_waitcnt vmcnt(0) 8304; GFX6-NEXT: buffer_wbinvl1 8305; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8306; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8307; GFX6-NEXT: s_endpgm 8308; 8309; GFX7-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8310; GFX7: ; %bb.0: ; %entry 8311; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8312; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8313; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8314; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8315; GFX7-NEXT: s_mov_b64 s[12:13], 16 8316; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8317; GFX7-NEXT: s_mov_b32 s6, s4 8318; GFX7-NEXT: s_mov_b32 s7, s5 8319; GFX7-NEXT: s_mov_b32 s11, s12 8320; GFX7-NEXT: s_mov_b32 s10, s13 8321; GFX7-NEXT: s_add_u32 s6, s6, s11 8322; GFX7-NEXT: s_addc_u32 s10, s7, s10 8323; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8324; GFX7-NEXT: s_mov_b32 s7, s10 8325; GFX7-NEXT: v_mov_b32_e32 v2, s9 8326; GFX7-NEXT: v_mov_b32_e32 v0, s8 8327; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8328; GFX7-NEXT: v_mov_b32_e32 v3, v0 8329; GFX7-NEXT: v_mov_b32_e32 v0, s6 8330; GFX7-NEXT: v_mov_b32_e32 v1, s7 8331; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8332; GFX7-NEXT: s_waitcnt vmcnt(0) 8333; GFX7-NEXT: buffer_wbinvl1_vol 8334; GFX7-NEXT: v_mov_b32_e32 v0, s4 8335; GFX7-NEXT: v_mov_b32_e32 v1, s5 8336; GFX7-NEXT: flat_store_dword v[0:1], v2 8337; GFX7-NEXT: s_endpgm 8338; 8339; GFX10-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8340; GFX10-WGP: ; %bb.0: ; %entry 8341; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8342; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8343; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8344; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8345; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8346; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8347; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8348; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8349; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8350; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8351; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8352; GFX10-WGP-NEXT: buffer_gl1_inv 8353; GFX10-WGP-NEXT: buffer_gl0_inv 8354; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8355; GFX10-WGP-NEXT: s_endpgm 8356; 8357; GFX10-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8358; GFX10-CU: ; %bb.0: ; %entry 8359; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8360; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8361; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8362; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8363; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8364; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8365; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8366; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8367; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8368; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8369; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8370; GFX10-CU-NEXT: buffer_gl1_inv 8371; GFX10-CU-NEXT: buffer_gl0_inv 8372; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8373; GFX10-CU-NEXT: s_endpgm 8374; 8375; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8376; SKIP-CACHE-INV: ; %bb.0: ; %entry 8377; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8378; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8379; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8380; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8381; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8382; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8383; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8384; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8385; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8386; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8387; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8388; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8389; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8390; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8391; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8392; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8393; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8394; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8395; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8396; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8397; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8398; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8399; SKIP-CACHE-INV-NEXT: s_endpgm 8400; 8401; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8402; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8403; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8404; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8405; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8406; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8407; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8408; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8409; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8410; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8411; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8412; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8413; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8414; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8415; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8416; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8417; 8418; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8419; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8420; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8421; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8422; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8423; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8424; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8425; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8426; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8427; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8428; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8429; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8430; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8431; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8432; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8433; GFX90A-TGSPLIT-NEXT: s_endpgm 8434; 8435; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8436; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8437; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8438; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8439; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8440; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8441; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8442; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8443; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8444; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8445; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8446; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8447; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8448; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 8449; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8450; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8451; 8452; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8453; GFX940-TGSPLIT: ; %bb.0: ; %entry 8454; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8455; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8456; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8457; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8458; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8459; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8460; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8461; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8462; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8463; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8464; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8465; GFX940-TGSPLIT-NEXT: buffer_inv sc1 8466; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8467; GFX940-TGSPLIT-NEXT: s_endpgm 8468; 8469; GFX11-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8470; GFX11-WGP: ; %bb.0: ; %entry 8471; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8472; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8473; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8474; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8475; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8476; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8477; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8478; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8479; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8480; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8481; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8482; GFX11-WGP-NEXT: buffer_gl1_inv 8483; GFX11-WGP-NEXT: buffer_gl0_inv 8484; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8485; GFX11-WGP-NEXT: s_endpgm 8486; 8487; GFX11-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8488; GFX11-CU: ; %bb.0: ; %entry 8489; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8490; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8491; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8492; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8493; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8494; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8495; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8496; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8497; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8498; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8499; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8500; GFX11-CU-NEXT: buffer_gl1_inv 8501; GFX11-CU-NEXT: buffer_gl0_inv 8502; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8503; GFX11-CU-NEXT: s_endpgm 8504; 8505; GFX12-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8506; GFX12-WGP: ; %bb.0: ; %entry 8507; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8508; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8509; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8510; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8511; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8512; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8513; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8514; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8515; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8516; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 8517; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8518; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8519; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8520; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 8521; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8522; GFX12-WGP-NEXT: s_endpgm 8523; 8524; GFX12-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: 8525; GFX12-CU: ; %bb.0: ; %entry 8526; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8527; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8528; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8529; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8530; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8531; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8532; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8533; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8534; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8535; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 8536; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8537; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8538; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8539; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 8540; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8541; GFX12-CU-NEXT: s_endpgm 8542 ptr addrspace(1) %out, i32 %in, i32 %old) { 8543entry: 8544 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8545 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire 8546 %val0 = extractvalue { i32, i1 } %val, 0 8547 store i32 %val0, ptr addrspace(1) %out, align 4 8548 ret void 8549} 8550 8551define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( 8552; GFX6-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8553; GFX6: ; %bb.0: ; %entry 8554; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8555; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8556; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8557; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8558; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8559; GFX6-NEXT: s_mov_b32 s12, s5 8560; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8561; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8562; GFX6-NEXT: s_mov_b32 s11, -1 8563; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8564; GFX6-NEXT: s_mov_b32 s5, s12 8565; GFX6-NEXT: s_mov_b32 s6, s11 8566; GFX6-NEXT: s_mov_b32 s7, s10 8567; GFX6-NEXT: v_mov_b32_e32 v0, s9 8568; GFX6-NEXT: v_mov_b32_e32 v2, s8 8569; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8570; GFX6-NEXT: v_mov_b32_e32 v1, v2 8571; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8572; GFX6-NEXT: s_waitcnt vmcnt(0) 8573; GFX6-NEXT: buffer_wbinvl1 8574; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8575; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8576; GFX6-NEXT: s_endpgm 8577; 8578; GFX7-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8579; GFX7: ; %bb.0: ; %entry 8580; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8581; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8582; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8583; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8584; GFX7-NEXT: s_mov_b64 s[12:13], 16 8585; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8586; GFX7-NEXT: s_mov_b32 s6, s4 8587; GFX7-NEXT: s_mov_b32 s7, s5 8588; GFX7-NEXT: s_mov_b32 s11, s12 8589; GFX7-NEXT: s_mov_b32 s10, s13 8590; GFX7-NEXT: s_add_u32 s6, s6, s11 8591; GFX7-NEXT: s_addc_u32 s10, s7, s10 8592; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8593; GFX7-NEXT: s_mov_b32 s7, s10 8594; GFX7-NEXT: v_mov_b32_e32 v2, s9 8595; GFX7-NEXT: v_mov_b32_e32 v0, s8 8596; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8597; GFX7-NEXT: v_mov_b32_e32 v3, v0 8598; GFX7-NEXT: v_mov_b32_e32 v0, s6 8599; GFX7-NEXT: v_mov_b32_e32 v1, s7 8600; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8601; GFX7-NEXT: s_waitcnt vmcnt(0) 8602; GFX7-NEXT: buffer_wbinvl1_vol 8603; GFX7-NEXT: v_mov_b32_e32 v0, s4 8604; GFX7-NEXT: v_mov_b32_e32 v1, s5 8605; GFX7-NEXT: flat_store_dword v[0:1], v2 8606; GFX7-NEXT: s_endpgm 8607; 8608; GFX10-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8609; GFX10-WGP: ; %bb.0: ; %entry 8610; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8611; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8612; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8613; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8614; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8615; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8616; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8617; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8618; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8619; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8620; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8621; GFX10-WGP-NEXT: buffer_gl1_inv 8622; GFX10-WGP-NEXT: buffer_gl0_inv 8623; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8624; GFX10-WGP-NEXT: s_endpgm 8625; 8626; GFX10-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8627; GFX10-CU: ; %bb.0: ; %entry 8628; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8629; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8630; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8631; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8632; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8633; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8634; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8635; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8636; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8637; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8638; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8639; GFX10-CU-NEXT: buffer_gl1_inv 8640; GFX10-CU-NEXT: buffer_gl0_inv 8641; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8642; GFX10-CU-NEXT: s_endpgm 8643; 8644; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8645; SKIP-CACHE-INV: ; %bb.0: ; %entry 8646; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8647; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8648; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8649; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8650; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8651; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8652; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8653; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8654; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8655; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8656; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8657; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8658; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8659; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8660; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8661; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8662; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8663; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8664; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8665; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8666; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8667; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8668; SKIP-CACHE-INV-NEXT: s_endpgm 8669; 8670; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8671; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8672; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8673; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8674; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8675; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8676; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8677; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8678; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8679; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8680; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8681; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8682; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8683; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8684; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8685; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8686; 8687; GFX90A-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8688; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8689; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8690; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8691; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8692; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8693; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8694; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8695; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8696; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8697; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8698; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8699; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8700; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8701; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8702; GFX90A-TGSPLIT-NEXT: s_endpgm 8703; 8704; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8705; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8706; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8707; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8708; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8709; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8710; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8711; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8712; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8713; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8714; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8715; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8716; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8717; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 8718; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8719; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8720; 8721; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8722; GFX940-TGSPLIT: ; %bb.0: ; %entry 8723; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8724; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8725; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8726; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8727; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8728; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8729; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8730; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8731; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8732; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8733; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8734; GFX940-TGSPLIT-NEXT: buffer_inv sc1 8735; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8736; GFX940-TGSPLIT-NEXT: s_endpgm 8737; 8738; GFX11-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8739; GFX11-WGP: ; %bb.0: ; %entry 8740; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8741; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8742; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8743; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8744; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8745; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8746; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8747; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8748; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8749; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8750; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8751; GFX11-WGP-NEXT: buffer_gl1_inv 8752; GFX11-WGP-NEXT: buffer_gl0_inv 8753; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8754; GFX11-WGP-NEXT: s_endpgm 8755; 8756; GFX11-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8757; GFX11-CU: ; %bb.0: ; %entry 8758; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8759; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8760; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8761; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8762; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8763; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8764; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8765; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8766; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8767; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8768; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8769; GFX11-CU-NEXT: buffer_gl1_inv 8770; GFX11-CU-NEXT: buffer_gl0_inv 8771; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8772; GFX11-CU-NEXT: s_endpgm 8773; 8774; GFX12-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8775; GFX12-WGP: ; %bb.0: ; %entry 8776; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8777; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8778; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8779; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8780; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8781; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8782; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8783; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8784; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8785; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 8786; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8787; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 8788; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8789; GFX12-WGP-NEXT: s_endpgm 8790; 8791; GFX12-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 8792; GFX12-CU: ; %bb.0: ; %entry 8793; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8794; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8795; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8796; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8797; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8798; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8799; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8800; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8801; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8802; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 8803; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8804; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 8805; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8806; GFX12-CU-NEXT: s_endpgm 8807 ptr addrspace(1) %out, i32 %in, i32 %old) { 8808entry: 8809 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8810 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire 8811 %val0 = extractvalue { i32, i1 } %val, 0 8812 store i32 %val0, ptr addrspace(1) %out, align 4 8813 ret void 8814} 8815 8816define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( 8817; GFX6-LABEL: global_agent_release_acquire_ret_cmpxchg: 8818; GFX6: ; %bb.0: ; %entry 8819; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8820; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8821; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8822; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8823; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8824; GFX6-NEXT: s_mov_b32 s12, s5 8825; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8826; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8827; GFX6-NEXT: s_mov_b32 s11, -1 8828; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8829; GFX6-NEXT: s_mov_b32 s5, s12 8830; GFX6-NEXT: s_mov_b32 s6, s11 8831; GFX6-NEXT: s_mov_b32 s7, s10 8832; GFX6-NEXT: v_mov_b32_e32 v0, s9 8833; GFX6-NEXT: v_mov_b32_e32 v2, s8 8834; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8835; GFX6-NEXT: v_mov_b32_e32 v1, v2 8836; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8837; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8838; GFX6-NEXT: s_waitcnt vmcnt(0) 8839; GFX6-NEXT: buffer_wbinvl1 8840; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8841; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8842; GFX6-NEXT: s_endpgm 8843; 8844; GFX7-LABEL: global_agent_release_acquire_ret_cmpxchg: 8845; GFX7: ; %bb.0: ; %entry 8846; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8847; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8848; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8849; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8850; GFX7-NEXT: s_mov_b64 s[12:13], 16 8851; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8852; GFX7-NEXT: s_mov_b32 s6, s4 8853; GFX7-NEXT: s_mov_b32 s7, s5 8854; GFX7-NEXT: s_mov_b32 s11, s12 8855; GFX7-NEXT: s_mov_b32 s10, s13 8856; GFX7-NEXT: s_add_u32 s6, s6, s11 8857; GFX7-NEXT: s_addc_u32 s10, s7, s10 8858; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8859; GFX7-NEXT: s_mov_b32 s7, s10 8860; GFX7-NEXT: v_mov_b32_e32 v2, s9 8861; GFX7-NEXT: v_mov_b32_e32 v0, s8 8862; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8863; GFX7-NEXT: v_mov_b32_e32 v3, v0 8864; GFX7-NEXT: v_mov_b32_e32 v0, s6 8865; GFX7-NEXT: v_mov_b32_e32 v1, s7 8866; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8867; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8868; GFX7-NEXT: s_waitcnt vmcnt(0) 8869; GFX7-NEXT: buffer_wbinvl1_vol 8870; GFX7-NEXT: v_mov_b32_e32 v0, s4 8871; GFX7-NEXT: v_mov_b32_e32 v1, s5 8872; GFX7-NEXT: flat_store_dword v[0:1], v2 8873; GFX7-NEXT: s_endpgm 8874; 8875; GFX10-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg: 8876; GFX10-WGP: ; %bb.0: ; %entry 8877; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8878; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8879; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8880; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8881; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8882; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8883; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8884; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8885; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8886; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8887; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8888; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8889; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8890; GFX10-WGP-NEXT: buffer_gl1_inv 8891; GFX10-WGP-NEXT: buffer_gl0_inv 8892; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8893; GFX10-WGP-NEXT: s_endpgm 8894; 8895; GFX10-CU-LABEL: global_agent_release_acquire_ret_cmpxchg: 8896; GFX10-CU: ; %bb.0: ; %entry 8897; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8898; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8899; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8900; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8901; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8902; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8903; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8904; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8905; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8906; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8907; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 8908; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8909; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8910; GFX10-CU-NEXT: buffer_gl1_inv 8911; GFX10-CU-NEXT: buffer_gl0_inv 8912; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8913; GFX10-CU-NEXT: s_endpgm 8914; 8915; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_ret_cmpxchg: 8916; SKIP-CACHE-INV: ; %bb.0: ; %entry 8917; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8918; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8919; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8920; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8921; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8922; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8923; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8924; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8925; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8926; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8927; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8928; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8929; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8930; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8931; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8932; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8933; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8934; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8935; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8936; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8937; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8938; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8939; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8940; SKIP-CACHE-INV-NEXT: s_endpgm 8941; 8942; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: 8943; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8944; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8945; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8946; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8947; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8948; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8949; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8950; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8951; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8952; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8953; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8954; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8955; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8956; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8957; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8958; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8959; 8960; GFX90A-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: 8961; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8962; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8963; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8964; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8965; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8966; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8967; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8968; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8969; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8970; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8971; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8972; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8973; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8974; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8975; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8976; GFX90A-TGSPLIT-NEXT: s_endpgm 8977; 8978; GFX940-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: 8979; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8980; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8981; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8982; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8983; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8984; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8985; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8986; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8987; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8988; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8989; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 8990; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8991; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8992; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8993; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 8994; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8995; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8996; 8997; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: 8998; GFX940-TGSPLIT: ; %bb.0: ; %entry 8999; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9000; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9001; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9002; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9003; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9004; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9005; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9006; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9007; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9008; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 9009; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9010; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9011; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9012; GFX940-TGSPLIT-NEXT: buffer_inv sc1 9013; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9014; GFX940-TGSPLIT-NEXT: s_endpgm 9015; 9016; GFX11-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg: 9017; GFX11-WGP: ; %bb.0: ; %entry 9018; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9019; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9020; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9021; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9022; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9023; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9024; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9025; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9026; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9027; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9028; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9029; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9030; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9031; GFX11-WGP-NEXT: buffer_gl1_inv 9032; GFX11-WGP-NEXT: buffer_gl0_inv 9033; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9034; GFX11-WGP-NEXT: s_endpgm 9035; 9036; GFX11-CU-LABEL: global_agent_release_acquire_ret_cmpxchg: 9037; GFX11-CU: ; %bb.0: ; %entry 9038; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9039; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9040; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9041; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9042; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9043; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9044; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9045; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9046; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9047; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9048; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9049; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9050; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9051; GFX11-CU-NEXT: buffer_gl1_inv 9052; GFX11-CU-NEXT: buffer_gl0_inv 9053; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9054; GFX11-CU-NEXT: s_endpgm 9055; 9056; GFX12-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg: 9057; GFX12-WGP: ; %bb.0: ; %entry 9058; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9059; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9060; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9061; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9062; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9063; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9064; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9065; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9066; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9067; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9068; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9069; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9070; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9071; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9072; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9073; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9074; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9075; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 9076; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9077; GFX12-WGP-NEXT: s_endpgm 9078; 9079; GFX12-CU-LABEL: global_agent_release_acquire_ret_cmpxchg: 9080; GFX12-CU: ; %bb.0: ; %entry 9081; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9082; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9083; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9084; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9085; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9086; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9087; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9088; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9089; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9090; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9091; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9092; GFX12-CU-NEXT: s_wait_storecnt 0x0 9093; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9094; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9095; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9096; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9097; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9098; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 9099; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9100; GFX12-CU-NEXT: s_endpgm 9101 ptr addrspace(1) %out, i32 %in, i32 %old) { 9102entry: 9103 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9104 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release acquire 9105 %val0 = extractvalue { i32, i1 } %val, 0 9106 store i32 %val0, ptr addrspace(1) %out, align 4 9107 ret void 9108} 9109 9110define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( 9111; GFX6-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9112; GFX6: ; %bb.0: ; %entry 9113; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9114; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9115; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9116; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9117; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9118; GFX6-NEXT: s_mov_b32 s12, s5 9119; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9120; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9121; GFX6-NEXT: s_mov_b32 s11, -1 9122; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9123; GFX6-NEXT: s_mov_b32 s5, s12 9124; GFX6-NEXT: s_mov_b32 s6, s11 9125; GFX6-NEXT: s_mov_b32 s7, s10 9126; GFX6-NEXT: v_mov_b32_e32 v0, s9 9127; GFX6-NEXT: v_mov_b32_e32 v2, s8 9128; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9129; GFX6-NEXT: v_mov_b32_e32 v1, v2 9130; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9131; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9132; GFX6-NEXT: s_waitcnt vmcnt(0) 9133; GFX6-NEXT: buffer_wbinvl1 9134; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9135; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9136; GFX6-NEXT: s_endpgm 9137; 9138; GFX7-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9139; GFX7: ; %bb.0: ; %entry 9140; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9141; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9142; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9143; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9144; GFX7-NEXT: s_mov_b64 s[12:13], 16 9145; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9146; GFX7-NEXT: s_mov_b32 s6, s4 9147; GFX7-NEXT: s_mov_b32 s7, s5 9148; GFX7-NEXT: s_mov_b32 s11, s12 9149; GFX7-NEXT: s_mov_b32 s10, s13 9150; GFX7-NEXT: s_add_u32 s6, s6, s11 9151; GFX7-NEXT: s_addc_u32 s10, s7, s10 9152; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9153; GFX7-NEXT: s_mov_b32 s7, s10 9154; GFX7-NEXT: v_mov_b32_e32 v2, s9 9155; GFX7-NEXT: v_mov_b32_e32 v0, s8 9156; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9157; GFX7-NEXT: v_mov_b32_e32 v3, v0 9158; GFX7-NEXT: v_mov_b32_e32 v0, s6 9159; GFX7-NEXT: v_mov_b32_e32 v1, s7 9160; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9161; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9162; GFX7-NEXT: s_waitcnt vmcnt(0) 9163; GFX7-NEXT: buffer_wbinvl1_vol 9164; GFX7-NEXT: v_mov_b32_e32 v0, s4 9165; GFX7-NEXT: v_mov_b32_e32 v1, s5 9166; GFX7-NEXT: flat_store_dword v[0:1], v2 9167; GFX7-NEXT: s_endpgm 9168; 9169; GFX10-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9170; GFX10-WGP: ; %bb.0: ; %entry 9171; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9172; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9173; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9174; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9175; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9176; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9177; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9178; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9179; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9180; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9181; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9182; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9183; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9184; GFX10-WGP-NEXT: buffer_gl1_inv 9185; GFX10-WGP-NEXT: buffer_gl0_inv 9186; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9187; GFX10-WGP-NEXT: s_endpgm 9188; 9189; GFX10-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9190; GFX10-CU: ; %bb.0: ; %entry 9191; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9192; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9193; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9194; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9195; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9196; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9197; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9198; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9199; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9200; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9201; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9202; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9203; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9204; GFX10-CU-NEXT: buffer_gl1_inv 9205; GFX10-CU-NEXT: buffer_gl0_inv 9206; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9207; GFX10-CU-NEXT: s_endpgm 9208; 9209; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9210; SKIP-CACHE-INV: ; %bb.0: ; %entry 9211; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9212; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9213; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9214; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9215; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9216; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9217; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9218; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9219; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9220; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9221; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9222; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9223; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9224; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9225; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9226; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9227; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9228; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9229; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9230; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9231; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9232; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9233; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9234; SKIP-CACHE-INV-NEXT: s_endpgm 9235; 9236; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9237; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9238; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9239; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9240; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9241; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9242; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9243; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9244; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9245; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9246; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9247; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9248; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9249; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9250; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9251; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9252; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9253; 9254; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9255; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9256; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9257; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9258; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9259; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9260; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9261; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9262; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9263; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9264; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9265; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9266; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9267; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9268; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9269; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9270; GFX90A-TGSPLIT-NEXT: s_endpgm 9271; 9272; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9273; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9274; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9275; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9276; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9277; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9278; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9279; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9280; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9281; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9282; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9283; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 9284; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9285; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9286; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9287; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 9288; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9289; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9290; 9291; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9292; GFX940-TGSPLIT: ; %bb.0: ; %entry 9293; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9294; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9295; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9296; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9297; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9298; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9299; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9300; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9301; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9302; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 9303; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9304; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9305; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9306; GFX940-TGSPLIT-NEXT: buffer_inv sc1 9307; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9308; GFX940-TGSPLIT-NEXT: s_endpgm 9309; 9310; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9311; GFX11-WGP: ; %bb.0: ; %entry 9312; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9313; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9314; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9315; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9316; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9317; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9318; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9319; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9320; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9321; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9322; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9323; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9324; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9325; GFX11-WGP-NEXT: buffer_gl1_inv 9326; GFX11-WGP-NEXT: buffer_gl0_inv 9327; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9328; GFX11-WGP-NEXT: s_endpgm 9329; 9330; GFX11-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9331; GFX11-CU: ; %bb.0: ; %entry 9332; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9333; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9334; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9335; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9336; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9337; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9338; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9339; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9340; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9341; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9342; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9343; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9344; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9345; GFX11-CU-NEXT: buffer_gl1_inv 9346; GFX11-CU-NEXT: buffer_gl0_inv 9347; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9348; GFX11-CU-NEXT: s_endpgm 9349; 9350; GFX12-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9351; GFX12-WGP: ; %bb.0: ; %entry 9352; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9353; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9354; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9355; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9356; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9357; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9358; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9359; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9360; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9361; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9362; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9363; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9364; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9365; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9366; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9367; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9368; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9369; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 9370; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9371; GFX12-WGP-NEXT: s_endpgm 9372; 9373; GFX12-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 9374; GFX12-CU: ; %bb.0: ; %entry 9375; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9376; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9377; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9378; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9379; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9380; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9381; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9382; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9383; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9384; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9385; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9386; GFX12-CU-NEXT: s_wait_storecnt 0x0 9387; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9388; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9389; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9390; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9391; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9392; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 9393; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9394; GFX12-CU-NEXT: s_endpgm 9395 ptr addrspace(1) %out, i32 %in, i32 %old) { 9396entry: 9397 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9398 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire 9399 %val0 = extractvalue { i32, i1 } %val, 0 9400 store i32 %val0, ptr addrspace(1) %out, align 4 9401 ret void 9402} 9403 9404define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( 9405; GFX6-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9406; GFX6: ; %bb.0: ; %entry 9407; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9408; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9409; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9410; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9411; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9412; GFX6-NEXT: s_mov_b32 s12, s5 9413; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9414; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9415; GFX6-NEXT: s_mov_b32 s11, -1 9416; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9417; GFX6-NEXT: s_mov_b32 s5, s12 9418; GFX6-NEXT: s_mov_b32 s6, s11 9419; GFX6-NEXT: s_mov_b32 s7, s10 9420; GFX6-NEXT: v_mov_b32_e32 v0, s9 9421; GFX6-NEXT: v_mov_b32_e32 v2, s8 9422; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9423; GFX6-NEXT: v_mov_b32_e32 v1, v2 9424; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9425; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9426; GFX6-NEXT: s_waitcnt vmcnt(0) 9427; GFX6-NEXT: buffer_wbinvl1 9428; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9429; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9430; GFX6-NEXT: s_endpgm 9431; 9432; GFX7-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9433; GFX7: ; %bb.0: ; %entry 9434; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9435; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9436; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9437; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9438; GFX7-NEXT: s_mov_b64 s[12:13], 16 9439; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9440; GFX7-NEXT: s_mov_b32 s6, s4 9441; GFX7-NEXT: s_mov_b32 s7, s5 9442; GFX7-NEXT: s_mov_b32 s11, s12 9443; GFX7-NEXT: s_mov_b32 s10, s13 9444; GFX7-NEXT: s_add_u32 s6, s6, s11 9445; GFX7-NEXT: s_addc_u32 s10, s7, s10 9446; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9447; GFX7-NEXT: s_mov_b32 s7, s10 9448; GFX7-NEXT: v_mov_b32_e32 v2, s9 9449; GFX7-NEXT: v_mov_b32_e32 v0, s8 9450; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9451; GFX7-NEXT: v_mov_b32_e32 v3, v0 9452; GFX7-NEXT: v_mov_b32_e32 v0, s6 9453; GFX7-NEXT: v_mov_b32_e32 v1, s7 9454; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9455; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9456; GFX7-NEXT: s_waitcnt vmcnt(0) 9457; GFX7-NEXT: buffer_wbinvl1_vol 9458; GFX7-NEXT: v_mov_b32_e32 v0, s4 9459; GFX7-NEXT: v_mov_b32_e32 v1, s5 9460; GFX7-NEXT: flat_store_dword v[0:1], v2 9461; GFX7-NEXT: s_endpgm 9462; 9463; GFX10-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9464; GFX10-WGP: ; %bb.0: ; %entry 9465; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9466; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9467; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9468; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9469; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9470; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9471; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9472; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9473; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9474; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9475; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9476; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9477; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9478; GFX10-WGP-NEXT: buffer_gl1_inv 9479; GFX10-WGP-NEXT: buffer_gl0_inv 9480; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9481; GFX10-WGP-NEXT: s_endpgm 9482; 9483; GFX10-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9484; GFX10-CU: ; %bb.0: ; %entry 9485; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9486; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9487; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9488; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9489; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9490; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9491; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9492; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9493; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9494; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9495; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9496; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9497; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9498; GFX10-CU-NEXT: buffer_gl1_inv 9499; GFX10-CU-NEXT: buffer_gl0_inv 9500; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9501; GFX10-CU-NEXT: s_endpgm 9502; 9503; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9504; SKIP-CACHE-INV: ; %bb.0: ; %entry 9505; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9506; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9507; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9508; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9509; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9510; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9511; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9512; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9513; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9514; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9515; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9516; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9517; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9518; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9519; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9520; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9521; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9522; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9523; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9524; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9525; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9526; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9527; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9528; SKIP-CACHE-INV-NEXT: s_endpgm 9529; 9530; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9531; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9532; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9533; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9534; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9535; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9536; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9537; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9538; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9539; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9540; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9541; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9542; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9543; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9544; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9545; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9546; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9547; 9548; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9549; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9550; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9551; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9552; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9553; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9554; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9555; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9556; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9557; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9558; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9559; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9560; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9561; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9562; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9563; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9564; GFX90A-TGSPLIT-NEXT: s_endpgm 9565; 9566; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9567; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9568; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9569; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9570; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9571; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9572; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9573; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9574; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9575; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9576; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9577; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 9578; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9579; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9580; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9581; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 9582; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9583; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9584; 9585; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9586; GFX940-TGSPLIT: ; %bb.0: ; %entry 9587; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9588; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9589; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9590; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9591; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9592; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9593; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9594; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9595; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9596; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 9597; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9598; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9599; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9600; GFX940-TGSPLIT-NEXT: buffer_inv sc1 9601; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9602; GFX940-TGSPLIT-NEXT: s_endpgm 9603; 9604; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9605; GFX11-WGP: ; %bb.0: ; %entry 9606; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9607; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9608; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9609; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9610; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9611; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9612; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9613; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9614; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9615; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9616; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9617; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9618; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9619; GFX11-WGP-NEXT: buffer_gl1_inv 9620; GFX11-WGP-NEXT: buffer_gl0_inv 9621; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9622; GFX11-WGP-NEXT: s_endpgm 9623; 9624; GFX11-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9625; GFX11-CU: ; %bb.0: ; %entry 9626; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9627; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9628; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9629; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9630; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9631; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9632; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9633; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9634; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9635; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9636; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9637; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9638; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9639; GFX11-CU-NEXT: buffer_gl1_inv 9640; GFX11-CU-NEXT: buffer_gl0_inv 9641; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9642; GFX11-CU-NEXT: s_endpgm 9643; 9644; GFX12-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9645; GFX12-WGP: ; %bb.0: ; %entry 9646; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9647; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9648; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9649; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9650; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9651; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9652; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9653; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9654; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9655; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9656; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9657; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9658; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9659; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9660; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9661; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9662; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9663; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 9664; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9665; GFX12-WGP-NEXT: s_endpgm 9666; 9667; GFX12-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 9668; GFX12-CU: ; %bb.0: ; %entry 9669; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9670; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9671; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9672; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9673; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9674; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9675; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9676; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9677; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9678; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9679; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9680; GFX12-CU-NEXT: s_wait_storecnt 0x0 9681; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9682; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9683; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9684; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9685; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9686; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 9687; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9688; GFX12-CU-NEXT: s_endpgm 9689 ptr addrspace(1) %out, i32 %in, i32 %old) { 9690entry: 9691 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9692 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire 9693 %val0 = extractvalue { i32, i1 } %val, 0 9694 store i32 %val0, ptr addrspace(1) %out, align 4 9695 ret void 9696} 9697 9698define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( 9699; GFX6-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9700; GFX6: ; %bb.0: ; %entry 9701; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9702; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9703; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9704; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9705; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9706; GFX6-NEXT: s_mov_b32 s12, s5 9707; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9708; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9709; GFX6-NEXT: s_mov_b32 s11, -1 9710; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9711; GFX6-NEXT: s_mov_b32 s5, s12 9712; GFX6-NEXT: s_mov_b32 s6, s11 9713; GFX6-NEXT: s_mov_b32 s7, s10 9714; GFX6-NEXT: v_mov_b32_e32 v0, s9 9715; GFX6-NEXT: v_mov_b32_e32 v2, s8 9716; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9717; GFX6-NEXT: v_mov_b32_e32 v1, v2 9718; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9719; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9720; GFX6-NEXT: s_waitcnt vmcnt(0) 9721; GFX6-NEXT: buffer_wbinvl1 9722; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9723; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9724; GFX6-NEXT: s_endpgm 9725; 9726; GFX7-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9727; GFX7: ; %bb.0: ; %entry 9728; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9729; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9730; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9731; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9732; GFX7-NEXT: s_mov_b64 s[12:13], 16 9733; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9734; GFX7-NEXT: s_mov_b32 s6, s4 9735; GFX7-NEXT: s_mov_b32 s7, s5 9736; GFX7-NEXT: s_mov_b32 s11, s12 9737; GFX7-NEXT: s_mov_b32 s10, s13 9738; GFX7-NEXT: s_add_u32 s6, s6, s11 9739; GFX7-NEXT: s_addc_u32 s10, s7, s10 9740; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9741; GFX7-NEXT: s_mov_b32 s7, s10 9742; GFX7-NEXT: v_mov_b32_e32 v2, s9 9743; GFX7-NEXT: v_mov_b32_e32 v0, s8 9744; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9745; GFX7-NEXT: v_mov_b32_e32 v3, v0 9746; GFX7-NEXT: v_mov_b32_e32 v0, s6 9747; GFX7-NEXT: v_mov_b32_e32 v1, s7 9748; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9749; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9750; GFX7-NEXT: s_waitcnt vmcnt(0) 9751; GFX7-NEXT: buffer_wbinvl1_vol 9752; GFX7-NEXT: v_mov_b32_e32 v0, s4 9753; GFX7-NEXT: v_mov_b32_e32 v1, s5 9754; GFX7-NEXT: flat_store_dword v[0:1], v2 9755; GFX7-NEXT: s_endpgm 9756; 9757; GFX10-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9758; GFX10-WGP: ; %bb.0: ; %entry 9759; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9760; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9761; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9762; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9763; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9764; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9765; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9766; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9767; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9768; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9769; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9770; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9771; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9772; GFX10-WGP-NEXT: buffer_gl1_inv 9773; GFX10-WGP-NEXT: buffer_gl0_inv 9774; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9775; GFX10-WGP-NEXT: s_endpgm 9776; 9777; GFX10-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9778; GFX10-CU: ; %bb.0: ; %entry 9779; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9780; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9781; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9782; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9783; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9784; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9785; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9786; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9787; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9788; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9789; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9790; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9791; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9792; GFX10-CU-NEXT: buffer_gl1_inv 9793; GFX10-CU-NEXT: buffer_gl0_inv 9794; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9795; GFX10-CU-NEXT: s_endpgm 9796; 9797; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9798; SKIP-CACHE-INV: ; %bb.0: ; %entry 9799; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9800; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9801; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9802; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9803; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9804; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9805; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9806; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9807; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9808; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9809; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9810; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9811; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9812; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9814; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9815; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9816; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9817; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9818; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9819; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9820; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9821; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9822; SKIP-CACHE-INV-NEXT: s_endpgm 9823; 9824; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9825; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9826; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9827; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9828; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9829; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9830; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9831; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9832; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9833; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9834; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9835; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9836; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9837; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9838; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9839; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9840; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9841; 9842; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9843; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9844; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9845; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9846; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9847; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9848; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9849; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9850; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9851; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9852; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9853; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9854; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9855; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9856; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9857; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9858; GFX90A-TGSPLIT-NEXT: s_endpgm 9859; 9860; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9861; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9862; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9863; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9864; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9865; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9866; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9867; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9868; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9869; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9870; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9871; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 9872; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9873; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9874; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9875; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 9876; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9877; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9878; 9879; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9880; GFX940-TGSPLIT: ; %bb.0: ; %entry 9881; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9882; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9883; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9884; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9885; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9886; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9887; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9888; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9889; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9890; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 9891; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9892; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9893; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9894; GFX940-TGSPLIT-NEXT: buffer_inv sc1 9895; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9896; GFX940-TGSPLIT-NEXT: s_endpgm 9897; 9898; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9899; GFX11-WGP: ; %bb.0: ; %entry 9900; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9901; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9902; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9903; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9904; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9905; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9906; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9907; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9908; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9909; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9910; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9911; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9912; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9913; GFX11-WGP-NEXT: buffer_gl1_inv 9914; GFX11-WGP-NEXT: buffer_gl0_inv 9915; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9916; GFX11-WGP-NEXT: s_endpgm 9917; 9918; GFX11-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9919; GFX11-CU: ; %bb.0: ; %entry 9920; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9921; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9922; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9923; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9924; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9925; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9926; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9927; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9928; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9929; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9930; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9931; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9932; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9933; GFX11-CU-NEXT: buffer_gl1_inv 9934; GFX11-CU-NEXT: buffer_gl0_inv 9935; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9936; GFX11-CU-NEXT: s_endpgm 9937; 9938; GFX12-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9939; GFX12-WGP: ; %bb.0: ; %entry 9940; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9941; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9942; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9943; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9944; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9945; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9946; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9947; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9948; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9949; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9950; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9951; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9952; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9953; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9954; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9955; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9956; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9957; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 9958; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9959; GFX12-WGP-NEXT: s_endpgm 9960; 9961; GFX12-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: 9962; GFX12-CU: ; %bb.0: ; %entry 9963; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9964; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9965; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9966; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9967; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9968; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9969; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9970; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9971; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9972; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9973; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9974; GFX12-CU-NEXT: s_wait_storecnt 0x0 9975; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9976; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 9977; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9978; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9979; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9980; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 9981; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9982; GFX12-CU-NEXT: s_endpgm 9983 ptr addrspace(1) %out, i32 %in, i32 %old) { 9984entry: 9985 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9986 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst 9987 %val0 = extractvalue { i32, i1 } %val, 0 9988 store i32 %val0, ptr addrspace(1) %out, align 4 9989 ret void 9990} 9991 9992define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( 9993; GFX6-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 9994; GFX6: ; %bb.0: ; %entry 9995; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9996; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9997; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9998; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9999; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10000; GFX6-NEXT: s_mov_b32 s12, s5 10001; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10002; GFX6-NEXT: s_mov_b32 s10, 0x100f000 10003; GFX6-NEXT: s_mov_b32 s11, -1 10004; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10005; GFX6-NEXT: s_mov_b32 s5, s12 10006; GFX6-NEXT: s_mov_b32 s6, s11 10007; GFX6-NEXT: s_mov_b32 s7, s10 10008; GFX6-NEXT: v_mov_b32_e32 v0, s9 10009; GFX6-NEXT: v_mov_b32_e32 v2, s8 10010; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10011; GFX6-NEXT: v_mov_b32_e32 v1, v2 10012; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10013; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 10014; GFX6-NEXT: s_waitcnt vmcnt(0) 10015; GFX6-NEXT: buffer_wbinvl1 10016; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10017; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10018; GFX6-NEXT: s_endpgm 10019; 10020; GFX7-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10021; GFX7: ; %bb.0: ; %entry 10022; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10023; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10024; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10025; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10026; GFX7-NEXT: s_mov_b64 s[12:13], 16 10027; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10028; GFX7-NEXT: s_mov_b32 s6, s4 10029; GFX7-NEXT: s_mov_b32 s7, s5 10030; GFX7-NEXT: s_mov_b32 s11, s12 10031; GFX7-NEXT: s_mov_b32 s10, s13 10032; GFX7-NEXT: s_add_u32 s6, s6, s11 10033; GFX7-NEXT: s_addc_u32 s10, s7, s10 10034; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10035; GFX7-NEXT: s_mov_b32 s7, s10 10036; GFX7-NEXT: v_mov_b32_e32 v2, s9 10037; GFX7-NEXT: v_mov_b32_e32 v0, s8 10038; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10039; GFX7-NEXT: v_mov_b32_e32 v3, v0 10040; GFX7-NEXT: v_mov_b32_e32 v0, s6 10041; GFX7-NEXT: v_mov_b32_e32 v1, s7 10042; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10043; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10044; GFX7-NEXT: s_waitcnt vmcnt(0) 10045; GFX7-NEXT: buffer_wbinvl1_vol 10046; GFX7-NEXT: v_mov_b32_e32 v0, s4 10047; GFX7-NEXT: v_mov_b32_e32 v1, s5 10048; GFX7-NEXT: flat_store_dword v[0:1], v2 10049; GFX7-NEXT: s_endpgm 10050; 10051; GFX10-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10052; GFX10-WGP: ; %bb.0: ; %entry 10053; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10054; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10055; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 10056; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 10057; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10058; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10059; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 10060; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10061; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 10062; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10063; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10064; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10065; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10066; GFX10-WGP-NEXT: buffer_gl1_inv 10067; GFX10-WGP-NEXT: buffer_gl0_inv 10068; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10069; GFX10-WGP-NEXT: s_endpgm 10070; 10071; GFX10-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10072; GFX10-CU: ; %bb.0: ; %entry 10073; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10074; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10075; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 10076; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 10077; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10078; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10079; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 10080; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10081; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 10082; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10083; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10084; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10085; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10086; GFX10-CU-NEXT: buffer_gl1_inv 10087; GFX10-CU-NEXT: buffer_gl0_inv 10088; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10089; GFX10-CU-NEXT: s_endpgm 10090; 10091; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10092; SKIP-CACHE-INV: ; %bb.0: ; %entry 10093; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10094; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10095; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10096; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10097; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10098; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 10099; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10100; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 10101; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 10102; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10103; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 10104; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 10105; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10106; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 10107; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 10108; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10109; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 10110; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10111; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 10112; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10113; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10114; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10115; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10116; SKIP-CACHE-INV-NEXT: s_endpgm 10117; 10118; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10119; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10120; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10121; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10122; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10123; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10124; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10125; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10126; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10127; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10128; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10129; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10130; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 10131; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10132; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10133; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10134; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10135; 10136; GFX90A-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10137; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10138; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10139; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10140; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10141; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10142; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10143; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10144; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10145; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10146; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10147; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10148; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 10149; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10150; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10151; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10152; GFX90A-TGSPLIT-NEXT: s_endpgm 10153; 10154; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10155; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10156; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10157; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10158; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10159; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10160; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10161; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10162; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10163; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10164; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10165; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 10166; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10167; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 10168; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10169; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 10170; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10171; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10172; 10173; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10174; GFX940-TGSPLIT: ; %bb.0: ; %entry 10175; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10176; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10177; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10178; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10179; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10180; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10181; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10182; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10183; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10184; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 10185; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10186; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 10187; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10188; GFX940-TGSPLIT-NEXT: buffer_inv sc1 10189; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10190; GFX940-TGSPLIT-NEXT: s_endpgm 10191; 10192; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10193; GFX11-WGP: ; %bb.0: ; %entry 10194; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10195; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10196; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10197; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10198; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10199; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 10200; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 10201; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10202; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 10203; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10204; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10205; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 10206; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10207; GFX11-WGP-NEXT: buffer_gl1_inv 10208; GFX11-WGP-NEXT: buffer_gl0_inv 10209; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10210; GFX11-WGP-NEXT: s_endpgm 10211; 10212; GFX11-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10213; GFX11-CU: ; %bb.0: ; %entry 10214; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10215; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10216; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10217; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10218; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10219; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 10220; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 10221; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10222; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 10223; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10224; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10225; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 10226; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10227; GFX11-CU-NEXT: buffer_gl1_inv 10228; GFX11-CU-NEXT: buffer_gl0_inv 10229; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10230; GFX11-CU-NEXT: s_endpgm 10231; 10232; GFX12-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10233; GFX12-WGP: ; %bb.0: ; %entry 10234; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10235; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10236; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10237; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10238; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10239; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 10240; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 10241; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10242; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 10243; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10244; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10245; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10246; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10247; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 10248; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10249; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 10250; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10251; GFX12-WGP-NEXT: s_endpgm 10252; 10253; GFX12-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: 10254; GFX12-CU: ; %bb.0: ; %entry 10255; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10256; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10257; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10258; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10259; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10260; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 10261; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 10262; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10263; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 10264; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10265; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10266; GFX12-CU-NEXT: s_wait_storecnt 0x0 10267; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10268; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 10269; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10270; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 10271; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10272; GFX12-CU-NEXT: s_endpgm 10273 ptr addrspace(1) %out, i32 %in, i32 %old) { 10274entry: 10275 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 10276 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst 10277 %val0 = extractvalue { i32, i1 } %val, 0 10278 store i32 %val0, ptr addrspace(1) %out, align 4 10279 ret void 10280} 10281 10282define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( 10283; GFX6-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10284; GFX6: ; %bb.0: ; %entry 10285; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 10286; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10287; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 10288; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 10289; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10290; GFX6-NEXT: s_mov_b32 s12, s5 10291; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10292; GFX6-NEXT: s_mov_b32 s10, 0x100f000 10293; GFX6-NEXT: s_mov_b32 s11, -1 10294; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10295; GFX6-NEXT: s_mov_b32 s5, s12 10296; GFX6-NEXT: s_mov_b32 s6, s11 10297; GFX6-NEXT: s_mov_b32 s7, s10 10298; GFX6-NEXT: v_mov_b32_e32 v0, s9 10299; GFX6-NEXT: v_mov_b32_e32 v2, s8 10300; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10301; GFX6-NEXT: v_mov_b32_e32 v1, v2 10302; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10303; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 10304; GFX6-NEXT: s_waitcnt vmcnt(0) 10305; GFX6-NEXT: buffer_wbinvl1 10306; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10307; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10308; GFX6-NEXT: s_endpgm 10309; 10310; GFX7-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10311; GFX7: ; %bb.0: ; %entry 10312; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10313; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10314; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10315; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10316; GFX7-NEXT: s_mov_b64 s[12:13], 16 10317; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10318; GFX7-NEXT: s_mov_b32 s6, s4 10319; GFX7-NEXT: s_mov_b32 s7, s5 10320; GFX7-NEXT: s_mov_b32 s11, s12 10321; GFX7-NEXT: s_mov_b32 s10, s13 10322; GFX7-NEXT: s_add_u32 s6, s6, s11 10323; GFX7-NEXT: s_addc_u32 s10, s7, s10 10324; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10325; GFX7-NEXT: s_mov_b32 s7, s10 10326; GFX7-NEXT: v_mov_b32_e32 v2, s9 10327; GFX7-NEXT: v_mov_b32_e32 v0, s8 10328; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10329; GFX7-NEXT: v_mov_b32_e32 v3, v0 10330; GFX7-NEXT: v_mov_b32_e32 v0, s6 10331; GFX7-NEXT: v_mov_b32_e32 v1, s7 10332; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10333; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10334; GFX7-NEXT: s_waitcnt vmcnt(0) 10335; GFX7-NEXT: buffer_wbinvl1_vol 10336; GFX7-NEXT: v_mov_b32_e32 v0, s4 10337; GFX7-NEXT: v_mov_b32_e32 v1, s5 10338; GFX7-NEXT: flat_store_dword v[0:1], v2 10339; GFX7-NEXT: s_endpgm 10340; 10341; GFX10-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10342; GFX10-WGP: ; %bb.0: ; %entry 10343; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10344; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10345; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 10346; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 10347; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10348; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10349; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 10350; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10351; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 10352; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10353; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10354; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10355; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10356; GFX10-WGP-NEXT: buffer_gl1_inv 10357; GFX10-WGP-NEXT: buffer_gl0_inv 10358; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10359; GFX10-WGP-NEXT: s_endpgm 10360; 10361; GFX10-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10362; GFX10-CU: ; %bb.0: ; %entry 10363; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10364; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10365; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 10366; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 10367; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10368; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10369; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 10370; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10371; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 10372; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10373; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10374; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10375; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10376; GFX10-CU-NEXT: buffer_gl1_inv 10377; GFX10-CU-NEXT: buffer_gl0_inv 10378; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10379; GFX10-CU-NEXT: s_endpgm 10380; 10381; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10382; SKIP-CACHE-INV: ; %bb.0: ; %entry 10383; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10384; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10385; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10386; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10387; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10388; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 10389; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10390; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 10391; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 10392; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10393; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 10394; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 10395; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10396; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 10397; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 10398; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10399; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 10400; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10401; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 10402; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10403; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10404; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10405; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10406; SKIP-CACHE-INV-NEXT: s_endpgm 10407; 10408; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10409; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10410; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10411; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10412; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10413; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10414; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10415; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10416; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10417; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10418; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10419; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10420; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 10421; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10422; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10423; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10424; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10425; 10426; GFX90A-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10427; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10428; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10429; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10430; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10431; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10432; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10433; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10434; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10435; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10436; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10437; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10438; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 10439; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10440; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10441; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10442; GFX90A-TGSPLIT-NEXT: s_endpgm 10443; 10444; GFX940-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10445; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10446; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10447; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10448; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10449; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10450; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10451; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10452; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10453; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10454; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10455; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 10456; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10457; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 10458; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10459; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 10460; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10461; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10462; 10463; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10464; GFX940-TGSPLIT: ; %bb.0: ; %entry 10465; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10466; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10467; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10468; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10469; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10470; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10471; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10472; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10473; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10474; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 10475; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10476; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 10477; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10478; GFX940-TGSPLIT-NEXT: buffer_inv sc1 10479; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10480; GFX940-TGSPLIT-NEXT: s_endpgm 10481; 10482; GFX11-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10483; GFX11-WGP: ; %bb.0: ; %entry 10484; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10485; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10486; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10487; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10488; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10489; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 10490; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 10491; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10492; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 10493; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10494; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10495; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 10496; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10497; GFX11-WGP-NEXT: buffer_gl1_inv 10498; GFX11-WGP-NEXT: buffer_gl0_inv 10499; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10500; GFX11-WGP-NEXT: s_endpgm 10501; 10502; GFX11-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10503; GFX11-CU: ; %bb.0: ; %entry 10504; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10505; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10506; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10507; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10508; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10509; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 10510; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 10511; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10512; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 10513; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10514; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10515; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 10516; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10517; GFX11-CU-NEXT: buffer_gl1_inv 10518; GFX11-CU-NEXT: buffer_gl0_inv 10519; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10520; GFX11-CU-NEXT: s_endpgm 10521; 10522; GFX12-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10523; GFX12-WGP: ; %bb.0: ; %entry 10524; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10525; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10526; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10527; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10528; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10529; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 10530; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 10531; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10532; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 10533; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10534; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10535; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10536; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10537; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 10538; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10539; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10540; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10541; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 10542; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10543; GFX12-WGP-NEXT: s_endpgm 10544; 10545; GFX12-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg: 10546; GFX12-CU: ; %bb.0: ; %entry 10547; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10548; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10549; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10550; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10551; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10552; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 10553; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 10554; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10555; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 10556; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10557; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10558; GFX12-CU-NEXT: s_wait_storecnt 0x0 10559; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10560; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 10561; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10562; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10563; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10564; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 10565; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10566; GFX12-CU-NEXT: s_endpgm 10567 ptr addrspace(1) %out, i32 %in, i32 %old) { 10568entry: 10569 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 10570 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst 10571 %val0 = extractvalue { i32, i1 } %val, 0 10572 store i32 %val0, ptr addrspace(1) %out, align 4 10573 ret void 10574} 10575 10576define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( 10577; GFX6-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10578; GFX6: ; %bb.0: ; %entry 10579; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 10580; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10581; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 10582; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 10583; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10584; GFX6-NEXT: s_mov_b32 s12, s5 10585; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10586; GFX6-NEXT: s_mov_b32 s10, 0x100f000 10587; GFX6-NEXT: s_mov_b32 s11, -1 10588; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10589; GFX6-NEXT: s_mov_b32 s5, s12 10590; GFX6-NEXT: s_mov_b32 s6, s11 10591; GFX6-NEXT: s_mov_b32 s7, s10 10592; GFX6-NEXT: v_mov_b32_e32 v0, s9 10593; GFX6-NEXT: v_mov_b32_e32 v2, s8 10594; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10595; GFX6-NEXT: v_mov_b32_e32 v1, v2 10596; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10597; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 10598; GFX6-NEXT: s_waitcnt vmcnt(0) 10599; GFX6-NEXT: buffer_wbinvl1 10600; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10601; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10602; GFX6-NEXT: s_endpgm 10603; 10604; GFX7-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10605; GFX7: ; %bb.0: ; %entry 10606; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10607; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10608; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10609; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10610; GFX7-NEXT: s_mov_b64 s[12:13], 16 10611; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10612; GFX7-NEXT: s_mov_b32 s6, s4 10613; GFX7-NEXT: s_mov_b32 s7, s5 10614; GFX7-NEXT: s_mov_b32 s11, s12 10615; GFX7-NEXT: s_mov_b32 s10, s13 10616; GFX7-NEXT: s_add_u32 s6, s6, s11 10617; GFX7-NEXT: s_addc_u32 s10, s7, s10 10618; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10619; GFX7-NEXT: s_mov_b32 s7, s10 10620; GFX7-NEXT: v_mov_b32_e32 v2, s9 10621; GFX7-NEXT: v_mov_b32_e32 v0, s8 10622; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10623; GFX7-NEXT: v_mov_b32_e32 v3, v0 10624; GFX7-NEXT: v_mov_b32_e32 v0, s6 10625; GFX7-NEXT: v_mov_b32_e32 v1, s7 10626; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10627; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10628; GFX7-NEXT: s_waitcnt vmcnt(0) 10629; GFX7-NEXT: buffer_wbinvl1_vol 10630; GFX7-NEXT: v_mov_b32_e32 v0, s4 10631; GFX7-NEXT: v_mov_b32_e32 v1, s5 10632; GFX7-NEXT: flat_store_dword v[0:1], v2 10633; GFX7-NEXT: s_endpgm 10634; 10635; GFX10-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10636; GFX10-WGP: ; %bb.0: ; %entry 10637; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10638; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10639; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 10640; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 10641; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10642; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10643; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 10644; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10645; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 10646; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10647; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10648; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10649; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10650; GFX10-WGP-NEXT: buffer_gl1_inv 10651; GFX10-WGP-NEXT: buffer_gl0_inv 10652; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10653; GFX10-WGP-NEXT: s_endpgm 10654; 10655; GFX10-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10656; GFX10-CU: ; %bb.0: ; %entry 10657; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10658; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10659; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 10660; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 10661; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10662; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10663; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 10664; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10665; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 10666; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10667; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10668; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10669; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10670; GFX10-CU-NEXT: buffer_gl1_inv 10671; GFX10-CU-NEXT: buffer_gl0_inv 10672; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10673; GFX10-CU-NEXT: s_endpgm 10674; 10675; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10676; SKIP-CACHE-INV: ; %bb.0: ; %entry 10677; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10678; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10679; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10680; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10681; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10682; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 10683; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10684; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 10685; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 10686; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10687; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 10688; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 10689; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10690; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 10691; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 10692; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10693; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 10694; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10695; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 10696; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10697; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10698; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10699; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10700; SKIP-CACHE-INV-NEXT: s_endpgm 10701; 10702; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10703; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10704; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10705; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10706; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10707; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10708; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10709; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10710; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10711; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10712; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10713; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10714; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 10715; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10716; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10717; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10718; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10719; 10720; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10721; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10722; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10723; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10724; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10725; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10726; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10727; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10728; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10729; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10730; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10731; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10732; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 10733; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10734; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10735; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10736; GFX90A-TGSPLIT-NEXT: s_endpgm 10737; 10738; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10739; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10740; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10741; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10742; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10743; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10744; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10745; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10746; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10747; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10748; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10749; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 10750; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10751; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 10752; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10753; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 10754; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10755; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10756; 10757; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10758; GFX940-TGSPLIT: ; %bb.0: ; %entry 10759; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10760; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10761; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10762; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10763; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10764; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10765; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10766; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10767; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 10768; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 10769; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10770; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 10771; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10772; GFX940-TGSPLIT-NEXT: buffer_inv sc1 10773; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10774; GFX940-TGSPLIT-NEXT: s_endpgm 10775; 10776; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10777; GFX11-WGP: ; %bb.0: ; %entry 10778; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10779; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10780; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10781; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10782; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10783; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 10784; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 10785; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10786; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 10787; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10788; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10789; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 10790; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10791; GFX11-WGP-NEXT: buffer_gl1_inv 10792; GFX11-WGP-NEXT: buffer_gl0_inv 10793; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10794; GFX11-WGP-NEXT: s_endpgm 10795; 10796; GFX11-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10797; GFX11-CU: ; %bb.0: ; %entry 10798; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10799; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10800; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10801; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10802; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10803; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 10804; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 10805; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10806; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 10807; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10808; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10809; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 10810; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10811; GFX11-CU-NEXT: buffer_gl1_inv 10812; GFX11-CU-NEXT: buffer_gl0_inv 10813; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10814; GFX11-CU-NEXT: s_endpgm 10815; 10816; GFX12-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10817; GFX12-WGP: ; %bb.0: ; %entry 10818; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10819; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10820; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10821; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10822; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10823; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 10824; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 10825; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10826; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 10827; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10828; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10829; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10830; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10831; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 10832; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10833; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10834; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10835; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 10836; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10837; GFX12-WGP-NEXT: s_endpgm 10838; 10839; GFX12-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: 10840; GFX12-CU: ; %bb.0: ; %entry 10841; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10842; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10843; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10844; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10845; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10846; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 10847; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 10848; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10849; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 10850; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10851; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10852; GFX12-CU-NEXT: s_wait_storecnt 0x0 10853; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10854; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 10855; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10856; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10857; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10858; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 10859; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10860; GFX12-CU-NEXT: s_endpgm 10861 ptr addrspace(1) %out, i32 %in, i32 %old) { 10862entry: 10863 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 10864 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst 10865 %val0 = extractvalue { i32, i1 } %val, 0 10866 store i32 %val0, ptr addrspace(1) %out, align 4 10867 ret void 10868} 10869 10870define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( 10871; GFX6-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 10872; GFX6: ; %bb.0: ; %entry 10873; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 10874; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10875; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 10876; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 10877; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10878; GFX6-NEXT: s_mov_b32 s12, s5 10879; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10880; GFX6-NEXT: s_mov_b32 s10, 0x100f000 10881; GFX6-NEXT: s_mov_b32 s11, -1 10882; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10883; GFX6-NEXT: s_mov_b32 s5, s12 10884; GFX6-NEXT: s_mov_b32 s6, s11 10885; GFX6-NEXT: s_mov_b32 s7, s10 10886; GFX6-NEXT: v_mov_b32_e32 v0, s9 10887; GFX6-NEXT: v_mov_b32_e32 v2, s8 10888; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10889; GFX6-NEXT: v_mov_b32_e32 v1, v2 10890; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10891; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 10892; GFX6-NEXT: s_waitcnt vmcnt(0) 10893; GFX6-NEXT: buffer_wbinvl1 10894; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10895; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10896; GFX6-NEXT: s_endpgm 10897; 10898; GFX7-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 10899; GFX7: ; %bb.0: ; %entry 10900; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10901; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10902; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10903; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10904; GFX7-NEXT: s_mov_b64 s[12:13], 16 10905; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10906; GFX7-NEXT: s_mov_b32 s6, s4 10907; GFX7-NEXT: s_mov_b32 s7, s5 10908; GFX7-NEXT: s_mov_b32 s11, s12 10909; GFX7-NEXT: s_mov_b32 s10, s13 10910; GFX7-NEXT: s_add_u32 s6, s6, s11 10911; GFX7-NEXT: s_addc_u32 s10, s7, s10 10912; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10913; GFX7-NEXT: s_mov_b32 s7, s10 10914; GFX7-NEXT: v_mov_b32_e32 v2, s9 10915; GFX7-NEXT: v_mov_b32_e32 v0, s8 10916; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10917; GFX7-NEXT: v_mov_b32_e32 v3, v0 10918; GFX7-NEXT: v_mov_b32_e32 v0, s6 10919; GFX7-NEXT: v_mov_b32_e32 v1, s7 10920; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10921; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10922; GFX7-NEXT: s_waitcnt vmcnt(0) 10923; GFX7-NEXT: buffer_wbinvl1_vol 10924; GFX7-NEXT: v_mov_b32_e32 v0, s4 10925; GFX7-NEXT: v_mov_b32_e32 v1, s5 10926; GFX7-NEXT: flat_store_dword v[0:1], v2 10927; GFX7-NEXT: s_endpgm 10928; 10929; GFX10-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 10930; GFX10-WGP: ; %bb.0: ; %entry 10931; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10932; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10933; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 10934; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 10935; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10936; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10937; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 10938; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10939; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 10940; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10941; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10942; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10943; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10944; GFX10-WGP-NEXT: buffer_gl1_inv 10945; GFX10-WGP-NEXT: buffer_gl0_inv 10946; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10947; GFX10-WGP-NEXT: s_endpgm 10948; 10949; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 10950; GFX10-CU: ; %bb.0: ; %entry 10951; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10952; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10953; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 10954; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 10955; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10956; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10957; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 10958; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 10959; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 10960; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10961; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10962; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 10963; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10964; GFX10-CU-NEXT: buffer_gl1_inv 10965; GFX10-CU-NEXT: buffer_gl0_inv 10966; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10967; GFX10-CU-NEXT: s_endpgm 10968; 10969; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 10970; SKIP-CACHE-INV: ; %bb.0: ; %entry 10971; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10972; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10973; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10974; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10975; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10976; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 10977; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10978; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 10979; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 10980; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10981; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 10982; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 10983; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10984; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 10985; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 10986; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 10987; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 10988; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10989; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 10990; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10991; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 10992; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10993; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10994; SKIP-CACHE-INV-NEXT: s_endpgm 10995; 10996; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 10997; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10998; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10999; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11000; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 11001; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 11002; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11003; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 11004; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11005; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11006; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 11007; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11008; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 11009; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11010; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11011; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11012; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11013; 11014; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11015; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11016; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11017; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11018; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 11019; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 11020; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11021; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 11022; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11023; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11024; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 11025; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11026; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 11027; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11028; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11029; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11030; GFX90A-TGSPLIT-NEXT: s_endpgm 11031; 11032; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11033; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11034; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11035; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11036; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 11037; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 11038; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11039; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 11040; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11041; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11042; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 11043; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 11044; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11045; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 11046; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11047; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 11048; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11049; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11050; 11051; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11052; GFX940-TGSPLIT: ; %bb.0: ; %entry 11053; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11054; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11055; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 11056; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 11057; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11058; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 11059; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11060; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11061; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 11062; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 11063; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11064; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 11065; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11066; GFX940-TGSPLIT-NEXT: buffer_inv sc1 11067; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11068; GFX940-TGSPLIT-NEXT: s_endpgm 11069; 11070; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11071; GFX11-WGP: ; %bb.0: ; %entry 11072; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11073; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11074; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 11075; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 11076; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11077; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 11078; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 11079; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 11080; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 11081; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11082; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11083; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 11084; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11085; GFX11-WGP-NEXT: buffer_gl1_inv 11086; GFX11-WGP-NEXT: buffer_gl0_inv 11087; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11088; GFX11-WGP-NEXT: s_endpgm 11089; 11090; GFX11-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11091; GFX11-CU: ; %bb.0: ; %entry 11092; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11093; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11094; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 11095; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 11096; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11097; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 11098; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 11099; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 11100; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 11101; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11102; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11103; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 11104; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11105; GFX11-CU-NEXT: buffer_gl1_inv 11106; GFX11-CU-NEXT: buffer_gl0_inv 11107; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11108; GFX11-CU-NEXT: s_endpgm 11109; 11110; GFX12-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11111; GFX12-WGP: ; %bb.0: ; %entry 11112; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11113; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11114; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 11115; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 11116; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11117; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 11118; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 11119; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 11120; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 11121; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11122; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11123; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11124; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11125; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 11126; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11127; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11128; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11129; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 11130; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11131; GFX12-WGP-NEXT: s_endpgm 11132; 11133; GFX12-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 11134; GFX12-CU: ; %bb.0: ; %entry 11135; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11136; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11137; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 11138; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 11139; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11140; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 11141; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 11142; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 11143; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 11144; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11145; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11146; GFX12-CU-NEXT: s_wait_storecnt 0x0 11147; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11148; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 11149; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11150; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11151; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11152; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 11153; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11154; GFX12-CU-NEXT: s_endpgm 11155 ptr addrspace(1) %out, i32 %in, i32 %old) { 11156entry: 11157 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 11158 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst 11159 %val0 = extractvalue { i32, i1 } %val, 0 11160 store i32 %val0, ptr addrspace(1) %out, align 4 11161 ret void 11162} 11163 11164define amdgpu_kernel void @global_agent_one_as_unordered_load( 11165; GFX6-LABEL: global_agent_one_as_unordered_load: 11166; GFX6: ; %bb.0: ; %entry 11167; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11168; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 11169; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11171; GFX6-NEXT: s_mov_b32 s6, s9 11172; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 11173; GFX6-NEXT: s_mov_b32 s12, 0x100f000 11174; GFX6-NEXT: s_mov_b32 s13, -1 11175; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 11176; GFX6-NEXT: s_mov_b32 s9, s6 11177; GFX6-NEXT: s_mov_b32 s10, s13 11178; GFX6-NEXT: s_mov_b32 s11, s12 11179; GFX6-NEXT: s_mov_b32 s14, s5 11180; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11181; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11182; GFX6-NEXT: s_mov_b32 s5, s14 11183; GFX6-NEXT: s_mov_b32 s6, s13 11184; GFX6-NEXT: s_mov_b32 s7, s12 11185; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 11186; GFX6-NEXT: s_waitcnt vmcnt(0) 11187; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11188; GFX6-NEXT: s_endpgm 11189; 11190; GFX7-LABEL: global_agent_one_as_unordered_load: 11191; GFX7: ; %bb.0: ; %entry 11192; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11193; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11194; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11195; GFX7-NEXT: v_mov_b32_e32 v0, s6 11196; GFX7-NEXT: v_mov_b32_e32 v1, s7 11197; GFX7-NEXT: flat_load_dword v2, v[0:1] 11198; GFX7-NEXT: v_mov_b32_e32 v0, s4 11199; GFX7-NEXT: v_mov_b32_e32 v1, s5 11200; GFX7-NEXT: s_waitcnt vmcnt(0) 11201; GFX7-NEXT: flat_store_dword v[0:1], v2 11202; GFX7-NEXT: s_endpgm 11203; 11204; GFX10-WGP-LABEL: global_agent_one_as_unordered_load: 11205; GFX10-WGP: ; %bb.0: ; %entry 11206; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11207; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11208; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11209; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11210; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 11211; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11212; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11213; GFX10-WGP-NEXT: s_endpgm 11214; 11215; GFX10-CU-LABEL: global_agent_one_as_unordered_load: 11216; GFX10-CU: ; %bb.0: ; %entry 11217; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11218; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11219; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11220; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11221; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 11222; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11223; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11224; GFX10-CU-NEXT: s_endpgm 11225; 11226; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_load: 11227; SKIP-CACHE-INV: ; %bb.0: ; %entry 11228; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11229; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 11230; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11231; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11232; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 11233; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11234; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 11235; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 11236; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11237; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 11238; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 11239; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 11240; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 11241; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11242; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11243; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 11244; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 11245; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 11246; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 11247; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11248; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11249; SKIP-CACHE-INV-NEXT: s_endpgm 11250; 11251; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load: 11252; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11253; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11254; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11255; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11256; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11257; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 11258; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11259; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11260; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11261; 11262; GFX90A-TGSPLIT-LABEL: global_agent_one_as_unordered_load: 11263; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11264; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11265; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11266; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11267; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11268; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 11269; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11270; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11271; GFX90A-TGSPLIT-NEXT: s_endpgm 11272; 11273; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load: 11274; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11275; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11276; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11277; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11278; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11279; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 11280; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11281; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11282; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11283; 11284; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_load: 11285; GFX940-TGSPLIT: ; %bb.0: ; %entry 11286; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11287; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11288; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11289; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11290; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 11291; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11292; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11293; GFX940-TGSPLIT-NEXT: s_endpgm 11294; 11295; GFX11-WGP-LABEL: global_agent_one_as_unordered_load: 11296; GFX11-WGP: ; %bb.0: ; %entry 11297; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11298; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11299; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11300; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11301; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 11302; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11303; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11304; GFX11-WGP-NEXT: s_endpgm 11305; 11306; GFX11-CU-LABEL: global_agent_one_as_unordered_load: 11307; GFX11-CU: ; %bb.0: ; %entry 11308; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11309; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11310; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11311; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11312; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 11313; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11314; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11315; GFX11-CU-NEXT: s_endpgm 11316; 11317; GFX12-WGP-LABEL: global_agent_one_as_unordered_load: 11318; GFX12-WGP: ; %bb.0: ; %entry 11319; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11320; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11321; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11322; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11323; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 11324; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11325; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11326; GFX12-WGP-NEXT: s_endpgm 11327; 11328; GFX12-CU-LABEL: global_agent_one_as_unordered_load: 11329; GFX12-CU: ; %bb.0: ; %entry 11330; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11331; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11332; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11333; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11334; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 11335; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11336; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11337; GFX12-CU-NEXT: s_endpgm 11338 ptr addrspace(1) %in, ptr addrspace(1) %out) { 11339entry: 11340 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") unordered, align 4 11341 store i32 %val, ptr addrspace(1) %out 11342 ret void 11343} 11344 11345define amdgpu_kernel void @global_agent_one_as_monotonic_load( 11346; GFX6-LABEL: global_agent_one_as_monotonic_load: 11347; GFX6: ; %bb.0: ; %entry 11348; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11349; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 11350; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11351; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11352; GFX6-NEXT: s_mov_b32 s6, s9 11353; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 11354; GFX6-NEXT: s_mov_b32 s12, 0x100f000 11355; GFX6-NEXT: s_mov_b32 s13, -1 11356; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 11357; GFX6-NEXT: s_mov_b32 s9, s6 11358; GFX6-NEXT: s_mov_b32 s10, s13 11359; GFX6-NEXT: s_mov_b32 s11, s12 11360; GFX6-NEXT: s_mov_b32 s14, s5 11361; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11362; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11363; GFX6-NEXT: s_mov_b32 s5, s14 11364; GFX6-NEXT: s_mov_b32 s6, s13 11365; GFX6-NEXT: s_mov_b32 s7, s12 11366; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 11367; GFX6-NEXT: s_waitcnt vmcnt(0) 11368; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11369; GFX6-NEXT: s_endpgm 11370; 11371; GFX7-LABEL: global_agent_one_as_monotonic_load: 11372; GFX7: ; %bb.0: ; %entry 11373; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11374; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11375; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11376; GFX7-NEXT: v_mov_b32_e32 v0, s6 11377; GFX7-NEXT: v_mov_b32_e32 v1, s7 11378; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 11379; GFX7-NEXT: v_mov_b32_e32 v0, s4 11380; GFX7-NEXT: v_mov_b32_e32 v1, s5 11381; GFX7-NEXT: s_waitcnt vmcnt(0) 11382; GFX7-NEXT: flat_store_dword v[0:1], v2 11383; GFX7-NEXT: s_endpgm 11384; 11385; GFX10-WGP-LABEL: global_agent_one_as_monotonic_load: 11386; GFX10-WGP: ; %bb.0: ; %entry 11387; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11388; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11389; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11390; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11391; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 11392; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11393; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11394; GFX10-WGP-NEXT: s_endpgm 11395; 11396; GFX10-CU-LABEL: global_agent_one_as_monotonic_load: 11397; GFX10-CU: ; %bb.0: ; %entry 11398; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11399; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11400; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11401; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11402; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 11403; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11404; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11405; GFX10-CU-NEXT: s_endpgm 11406; 11407; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_load: 11408; SKIP-CACHE-INV: ; %bb.0: ; %entry 11409; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11410; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 11411; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11412; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11413; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 11414; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11415; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 11416; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 11417; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11418; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 11419; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 11420; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 11421; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 11422; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11423; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11424; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 11425; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 11426; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 11427; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 11428; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11429; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11430; SKIP-CACHE-INV-NEXT: s_endpgm 11431; 11432; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load: 11433; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11434; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11435; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11436; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11437; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11438; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 11439; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11440; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11441; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11442; 11443; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_load: 11444; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11445; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11446; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11447; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11448; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11449; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 11450; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11451; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11452; GFX90A-TGSPLIT-NEXT: s_endpgm 11453; 11454; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load: 11455; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11456; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11457; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11458; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11459; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11460; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 11461; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11462; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11463; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11464; 11465; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_load: 11466; GFX940-TGSPLIT: ; %bb.0: ; %entry 11467; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11468; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11469; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11470; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11471; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 11472; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11473; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11474; GFX940-TGSPLIT-NEXT: s_endpgm 11475; 11476; GFX11-WGP-LABEL: global_agent_one_as_monotonic_load: 11477; GFX11-WGP: ; %bb.0: ; %entry 11478; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11479; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11480; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11481; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11482; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 11483; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11484; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11485; GFX11-WGP-NEXT: s_endpgm 11486; 11487; GFX11-CU-LABEL: global_agent_one_as_monotonic_load: 11488; GFX11-CU: ; %bb.0: ; %entry 11489; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11490; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11491; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11492; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11493; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 11494; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11495; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11496; GFX11-CU-NEXT: s_endpgm 11497; 11498; GFX12-WGP-LABEL: global_agent_one_as_monotonic_load: 11499; GFX12-WGP: ; %bb.0: ; %entry 11500; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11501; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11502; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11503; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11504; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 11505; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11506; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11507; GFX12-WGP-NEXT: s_endpgm 11508; 11509; GFX12-CU-LABEL: global_agent_one_as_monotonic_load: 11510; GFX12-CU: ; %bb.0: ; %entry 11511; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11512; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11513; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11514; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11515; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 11516; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11517; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11518; GFX12-CU-NEXT: s_endpgm 11519 ptr addrspace(1) %in, ptr addrspace(1) %out) { 11520entry: 11521 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") monotonic, align 4 11522 store i32 %val, ptr addrspace(1) %out 11523 ret void 11524} 11525 11526define amdgpu_kernel void @global_agent_one_as_acquire_load( 11527; GFX6-LABEL: global_agent_one_as_acquire_load: 11528; GFX6: ; %bb.0: ; %entry 11529; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11530; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 11531; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11532; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11533; GFX6-NEXT: s_mov_b32 s6, s9 11534; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 11535; GFX6-NEXT: s_mov_b32 s12, 0x100f000 11536; GFX6-NEXT: s_mov_b32 s13, -1 11537; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 11538; GFX6-NEXT: s_mov_b32 s9, s6 11539; GFX6-NEXT: s_mov_b32 s10, s13 11540; GFX6-NEXT: s_mov_b32 s11, s12 11541; GFX6-NEXT: s_mov_b32 s14, s5 11542; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11543; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11544; GFX6-NEXT: s_mov_b32 s5, s14 11545; GFX6-NEXT: s_mov_b32 s6, s13 11546; GFX6-NEXT: s_mov_b32 s7, s12 11547; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 11548; GFX6-NEXT: s_waitcnt vmcnt(0) 11549; GFX6-NEXT: buffer_wbinvl1 11550; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11551; GFX6-NEXT: s_endpgm 11552; 11553; GFX7-LABEL: global_agent_one_as_acquire_load: 11554; GFX7: ; %bb.0: ; %entry 11555; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11556; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11557; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11558; GFX7-NEXT: v_mov_b32_e32 v0, s6 11559; GFX7-NEXT: v_mov_b32_e32 v1, s7 11560; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 11561; GFX7-NEXT: s_waitcnt vmcnt(0) 11562; GFX7-NEXT: buffer_wbinvl1_vol 11563; GFX7-NEXT: v_mov_b32_e32 v0, s4 11564; GFX7-NEXT: v_mov_b32_e32 v1, s5 11565; GFX7-NEXT: flat_store_dword v[0:1], v2 11566; GFX7-NEXT: s_endpgm 11567; 11568; GFX10-WGP-LABEL: global_agent_one_as_acquire_load: 11569; GFX10-WGP: ; %bb.0: ; %entry 11570; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11571; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11572; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11573; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11574; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 11575; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11576; GFX10-WGP-NEXT: buffer_gl1_inv 11577; GFX10-WGP-NEXT: buffer_gl0_inv 11578; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11579; GFX10-WGP-NEXT: s_endpgm 11580; 11581; GFX10-CU-LABEL: global_agent_one_as_acquire_load: 11582; GFX10-CU: ; %bb.0: ; %entry 11583; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11584; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11585; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11586; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11587; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 11588; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11589; GFX10-CU-NEXT: buffer_gl1_inv 11590; GFX10-CU-NEXT: buffer_gl0_inv 11591; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11592; GFX10-CU-NEXT: s_endpgm 11593; 11594; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_load: 11595; SKIP-CACHE-INV: ; %bb.0: ; %entry 11596; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11597; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 11598; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11599; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11600; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 11601; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11602; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 11603; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 11604; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11605; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 11606; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 11607; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 11608; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 11609; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11610; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11611; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 11612; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 11613; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 11614; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 11615; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11616; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11617; SKIP-CACHE-INV-NEXT: s_endpgm 11618; 11619; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load: 11620; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11621; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11622; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11623; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11624; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11625; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 11626; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11627; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11628; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11629; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11630; 11631; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_load: 11632; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11633; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11634; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11635; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11636; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11637; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 11638; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11639; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11640; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11641; GFX90A-TGSPLIT-NEXT: s_endpgm 11642; 11643; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load: 11644; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11645; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11646; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11647; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11648; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11649; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 11650; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11651; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 11652; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11653; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11654; 11655; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_load: 11656; GFX940-TGSPLIT: ; %bb.0: ; %entry 11657; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11658; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11659; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11660; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11661; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 11662; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11663; GFX940-TGSPLIT-NEXT: buffer_inv sc1 11664; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11665; GFX940-TGSPLIT-NEXT: s_endpgm 11666; 11667; GFX11-WGP-LABEL: global_agent_one_as_acquire_load: 11668; GFX11-WGP: ; %bb.0: ; %entry 11669; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11670; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11671; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11672; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11673; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 11674; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11675; GFX11-WGP-NEXT: buffer_gl1_inv 11676; GFX11-WGP-NEXT: buffer_gl0_inv 11677; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11678; GFX11-WGP-NEXT: s_endpgm 11679; 11680; GFX11-CU-LABEL: global_agent_one_as_acquire_load: 11681; GFX11-CU: ; %bb.0: ; %entry 11682; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11683; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11684; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11685; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11686; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 11687; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11688; GFX11-CU-NEXT: buffer_gl1_inv 11689; GFX11-CU-NEXT: buffer_gl0_inv 11690; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11691; GFX11-CU-NEXT: s_endpgm 11692; 11693; GFX12-WGP-LABEL: global_agent_one_as_acquire_load: 11694; GFX12-WGP: ; %bb.0: ; %entry 11695; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11696; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11697; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11698; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11699; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 11700; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11701; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 11702; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11703; GFX12-WGP-NEXT: s_endpgm 11704; 11705; GFX12-CU-LABEL: global_agent_one_as_acquire_load: 11706; GFX12-CU: ; %bb.0: ; %entry 11707; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11708; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11709; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11710; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11711; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 11712; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11713; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 11714; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11715; GFX12-CU-NEXT: s_endpgm 11716 ptr addrspace(1) %in, ptr addrspace(1) %out) { 11717entry: 11718 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") acquire, align 4 11719 store i32 %val, ptr addrspace(1) %out 11720 ret void 11721} 11722 11723define amdgpu_kernel void @global_agent_one_as_seq_cst_load( 11724; GFX6-LABEL: global_agent_one_as_seq_cst_load: 11725; GFX6: ; %bb.0: ; %entry 11726; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11727; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 11728; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11729; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11730; GFX6-NEXT: s_mov_b32 s6, s9 11731; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 11732; GFX6-NEXT: s_mov_b32 s12, 0x100f000 11733; GFX6-NEXT: s_mov_b32 s13, -1 11734; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 11735; GFX6-NEXT: s_mov_b32 s9, s6 11736; GFX6-NEXT: s_mov_b32 s10, s13 11737; GFX6-NEXT: s_mov_b32 s11, s12 11738; GFX6-NEXT: s_mov_b32 s14, s5 11739; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11740; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11741; GFX6-NEXT: s_mov_b32 s5, s14 11742; GFX6-NEXT: s_mov_b32 s6, s13 11743; GFX6-NEXT: s_mov_b32 s7, s12 11744; GFX6-NEXT: s_waitcnt vmcnt(0) 11745; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 11746; GFX6-NEXT: s_waitcnt vmcnt(0) 11747; GFX6-NEXT: buffer_wbinvl1 11748; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11749; GFX6-NEXT: s_endpgm 11750; 11751; GFX7-LABEL: global_agent_one_as_seq_cst_load: 11752; GFX7: ; %bb.0: ; %entry 11753; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11754; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11755; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11756; GFX7-NEXT: v_mov_b32_e32 v0, s6 11757; GFX7-NEXT: v_mov_b32_e32 v1, s7 11758; GFX7-NEXT: s_waitcnt vmcnt(0) 11759; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 11760; GFX7-NEXT: s_waitcnt vmcnt(0) 11761; GFX7-NEXT: buffer_wbinvl1_vol 11762; GFX7-NEXT: v_mov_b32_e32 v0, s4 11763; GFX7-NEXT: v_mov_b32_e32 v1, s5 11764; GFX7-NEXT: flat_store_dword v[0:1], v2 11765; GFX7-NEXT: s_endpgm 11766; 11767; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_load: 11768; GFX10-WGP: ; %bb.0: ; %entry 11769; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11770; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11771; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11772; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11773; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11774; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 11775; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11776; GFX10-WGP-NEXT: buffer_gl1_inv 11777; GFX10-WGP-NEXT: buffer_gl0_inv 11778; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11779; GFX10-WGP-NEXT: s_endpgm 11780; 11781; GFX10-CU-LABEL: global_agent_one_as_seq_cst_load: 11782; GFX10-CU: ; %bb.0: ; %entry 11783; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11784; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11785; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11786; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11787; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11788; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 11789; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11790; GFX10-CU-NEXT: buffer_gl1_inv 11791; GFX10-CU-NEXT: buffer_gl0_inv 11792; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11793; GFX10-CU-NEXT: s_endpgm 11794; 11795; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_load: 11796; SKIP-CACHE-INV: ; %bb.0: ; %entry 11797; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11798; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 11799; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11800; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11801; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 11802; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11803; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 11804; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 11805; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11806; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 11807; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 11808; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 11809; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 11810; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11811; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11812; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 11813; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 11814; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 11815; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11816; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 11817; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11818; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11819; SKIP-CACHE-INV-NEXT: s_endpgm 11820; 11821; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load: 11822; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11823; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11824; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11825; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11826; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11827; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 11828; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11829; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11830; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11831; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11832; 11833; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load: 11834; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11835; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11836; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11837; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11838; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11839; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 11840; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11841; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11842; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11843; GFX90A-TGSPLIT-NEXT: s_endpgm 11844; 11845; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load: 11846; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11847; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11848; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11849; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11850; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11851; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 11852; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11853; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 11854; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11855; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11856; 11857; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load: 11858; GFX940-TGSPLIT: ; %bb.0: ; %entry 11859; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11860; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11861; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11862; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11863; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 11864; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11865; GFX940-TGSPLIT-NEXT: buffer_inv sc1 11866; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11867; GFX940-TGSPLIT-NEXT: s_endpgm 11868; 11869; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_load: 11870; GFX11-WGP: ; %bb.0: ; %entry 11871; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11872; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11873; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11874; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11875; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11876; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 11877; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11878; GFX11-WGP-NEXT: buffer_gl1_inv 11879; GFX11-WGP-NEXT: buffer_gl0_inv 11880; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11881; GFX11-WGP-NEXT: s_endpgm 11882; 11883; GFX11-CU-LABEL: global_agent_one_as_seq_cst_load: 11884; GFX11-CU: ; %bb.0: ; %entry 11885; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11886; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11887; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11888; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11889; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11890; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 11891; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11892; GFX11-CU-NEXT: buffer_gl1_inv 11893; GFX11-CU-NEXT: buffer_gl0_inv 11894; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11895; GFX11-CU-NEXT: s_endpgm 11896; 11897; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_load: 11898; GFX12-WGP: ; %bb.0: ; %entry 11899; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11900; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11901; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11902; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11903; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11904; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11905; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11906; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11907; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 11908; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11909; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11910; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11911; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 11912; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11913; GFX12-WGP-NEXT: s_endpgm 11914; 11915; GFX12-CU-LABEL: global_agent_one_as_seq_cst_load: 11916; GFX12-CU: ; %bb.0: ; %entry 11917; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11918; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11919; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11920; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11921; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11922; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11923; GFX12-CU-NEXT: s_wait_storecnt 0x0 11924; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11925; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV 11926; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11927; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11928; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11929; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 11930; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11931; GFX12-CU-NEXT: s_endpgm 11932 ptr addrspace(1) %in, ptr addrspace(1) %out) { 11933entry: 11934 %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") seq_cst, align 4 11935 store i32 %val, ptr addrspace(1) %out 11936 ret void 11937} 11938 11939define amdgpu_kernel void @global_agent_one_as_unordered_store( 11940; GFX6-LABEL: global_agent_one_as_unordered_store: 11941; GFX6: ; %bb.0: ; %entry 11942; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11943; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 11944; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11945; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11946; GFX6-NEXT: s_mov_b32 s11, s5 11947; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11948; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11949; GFX6-NEXT: s_mov_b32 s10, -1 11950; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11951; GFX6-NEXT: s_mov_b32 s5, s11 11952; GFX6-NEXT: s_mov_b32 s6, s10 11953; GFX6-NEXT: s_mov_b32 s7, s9 11954; GFX6-NEXT: v_mov_b32_e32 v0, s8 11955; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11956; GFX6-NEXT: s_endpgm 11957; 11958; GFX7-LABEL: global_agent_one_as_unordered_store: 11959; GFX7: ; %bb.0: ; %entry 11960; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 11961; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 11962; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11963; GFX7-NEXT: v_mov_b32_e32 v0, s6 11964; GFX7-NEXT: v_mov_b32_e32 v1, s7 11965; GFX7-NEXT: v_mov_b32_e32 v2, s4 11966; GFX7-NEXT: flat_store_dword v[0:1], v2 11967; GFX7-NEXT: s_endpgm 11968; 11969; GFX10-WGP-LABEL: global_agent_one_as_unordered_store: 11970; GFX10-WGP: ; %bb.0: ; %entry 11971; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 11972; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11973; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11974; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11975; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11976; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11977; GFX10-WGP-NEXT: s_endpgm 11978; 11979; GFX10-CU-LABEL: global_agent_one_as_unordered_store: 11980; GFX10-CU: ; %bb.0: ; %entry 11981; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 11982; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11983; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11984; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11985; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11986; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11987; GFX10-CU-NEXT: s_endpgm 11988; 11989; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_store: 11990; SKIP-CACHE-INV: ; %bb.0: ; %entry 11991; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11992; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 11993; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11994; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11995; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11996; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11997; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11998; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11999; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12000; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12001; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12002; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12003; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12004; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12005; SKIP-CACHE-INV-NEXT: s_endpgm 12006; 12007; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store: 12008; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12009; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12010; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12011; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12012; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12013; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12014; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12015; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12016; 12017; GFX90A-TGSPLIT-LABEL: global_agent_one_as_unordered_store: 12018; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12019; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12020; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12021; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12022; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12023; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12024; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12025; GFX90A-TGSPLIT-NEXT: s_endpgm 12026; 12027; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store: 12028; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12029; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12030; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12031; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12032; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12033; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12034; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12035; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12036; 12037; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_store: 12038; GFX940-TGSPLIT: ; %bb.0: ; %entry 12039; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12040; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12041; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12042; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12043; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12044; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12045; GFX940-TGSPLIT-NEXT: s_endpgm 12046; 12047; GFX11-WGP-LABEL: global_agent_one_as_unordered_store: 12048; GFX11-WGP: ; %bb.0: ; %entry 12049; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12050; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12051; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12052; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12053; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12054; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12055; GFX11-WGP-NEXT: s_endpgm 12056; 12057; GFX11-CU-LABEL: global_agent_one_as_unordered_store: 12058; GFX11-CU: ; %bb.0: ; %entry 12059; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12060; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12061; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12062; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12063; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12064; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12065; GFX11-CU-NEXT: s_endpgm 12066; 12067; GFX12-WGP-LABEL: global_agent_one_as_unordered_store: 12068; GFX12-WGP: ; %bb.0: ; %entry 12069; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12070; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12071; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12072; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12073; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12074; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12075; GFX12-WGP-NEXT: s_endpgm 12076; 12077; GFX12-CU-LABEL: global_agent_one_as_unordered_store: 12078; GFX12-CU: ; %bb.0: ; %entry 12079; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12080; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12081; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12082; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12083; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12084; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12085; GFX12-CU-NEXT: s_endpgm 12086 i32 %in, ptr addrspace(1) %out) { 12087entry: 12088 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") unordered, align 4 12089 ret void 12090} 12091 12092define amdgpu_kernel void @global_agent_one_as_monotonic_store( 12093; GFX6-LABEL: global_agent_one_as_monotonic_store: 12094; GFX6: ; %bb.0: ; %entry 12095; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 12096; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 12097; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 12098; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12099; GFX6-NEXT: s_mov_b32 s11, s5 12100; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12101; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12102; GFX6-NEXT: s_mov_b32 s10, -1 12103; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12104; GFX6-NEXT: s_mov_b32 s5, s11 12105; GFX6-NEXT: s_mov_b32 s6, s10 12106; GFX6-NEXT: s_mov_b32 s7, s9 12107; GFX6-NEXT: v_mov_b32_e32 v0, s8 12108; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12109; GFX6-NEXT: s_endpgm 12110; 12111; GFX7-LABEL: global_agent_one_as_monotonic_store: 12112; GFX7: ; %bb.0: ; %entry 12113; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12114; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12115; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12116; GFX7-NEXT: v_mov_b32_e32 v0, s6 12117; GFX7-NEXT: v_mov_b32_e32 v1, s7 12118; GFX7-NEXT: v_mov_b32_e32 v2, s4 12119; GFX7-NEXT: flat_store_dword v[0:1], v2 12120; GFX7-NEXT: s_endpgm 12121; 12122; GFX10-WGP-LABEL: global_agent_one_as_monotonic_store: 12123; GFX10-WGP: ; %bb.0: ; %entry 12124; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 12125; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12126; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12127; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12128; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12129; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12130; GFX10-WGP-NEXT: s_endpgm 12131; 12132; GFX10-CU-LABEL: global_agent_one_as_monotonic_store: 12133; GFX10-CU: ; %bb.0: ; %entry 12134; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 12135; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12136; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12137; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12138; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12139; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12140; GFX10-CU-NEXT: s_endpgm 12141; 12142; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_store: 12143; SKIP-CACHE-INV: ; %bb.0: ; %entry 12144; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 12145; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 12146; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 12147; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12148; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12149; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12150; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12151; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12152; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12153; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12154; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12155; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12156; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12157; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12158; SKIP-CACHE-INV-NEXT: s_endpgm 12159; 12160; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store: 12161; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12162; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12163; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12164; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12165; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12166; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12167; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12168; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12169; 12170; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_store: 12171; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12172; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12173; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12174; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12175; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12176; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12177; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12178; GFX90A-TGSPLIT-NEXT: s_endpgm 12179; 12180; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store: 12181; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12182; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12183; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12184; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12185; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12186; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12187; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12188; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12189; 12190; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_store: 12191; GFX940-TGSPLIT: ; %bb.0: ; %entry 12192; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12193; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12194; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12195; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12196; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12197; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12198; GFX940-TGSPLIT-NEXT: s_endpgm 12199; 12200; GFX11-WGP-LABEL: global_agent_one_as_monotonic_store: 12201; GFX11-WGP: ; %bb.0: ; %entry 12202; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12203; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12204; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12205; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12206; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12207; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12208; GFX11-WGP-NEXT: s_endpgm 12209; 12210; GFX11-CU-LABEL: global_agent_one_as_monotonic_store: 12211; GFX11-CU: ; %bb.0: ; %entry 12212; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12213; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12214; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12215; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12216; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12217; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12218; GFX11-CU-NEXT: s_endpgm 12219; 12220; GFX12-WGP-LABEL: global_agent_one_as_monotonic_store: 12221; GFX12-WGP: ; %bb.0: ; %entry 12222; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12223; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12224; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12225; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12226; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12227; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12228; GFX12-WGP-NEXT: s_endpgm 12229; 12230; GFX12-CU-LABEL: global_agent_one_as_monotonic_store: 12231; GFX12-CU: ; %bb.0: ; %entry 12232; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12233; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12234; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12235; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12236; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12237; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12238; GFX12-CU-NEXT: s_endpgm 12239 i32 %in, ptr addrspace(1) %out) { 12240entry: 12241 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") monotonic, align 4 12242 ret void 12243} 12244 12245define amdgpu_kernel void @global_agent_one_as_release_store( 12246; GFX6-LABEL: global_agent_one_as_release_store: 12247; GFX6: ; %bb.0: ; %entry 12248; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 12249; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 12250; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 12251; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12252; GFX6-NEXT: s_mov_b32 s11, s5 12253; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12254; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12255; GFX6-NEXT: s_mov_b32 s10, -1 12256; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12257; GFX6-NEXT: s_mov_b32 s5, s11 12258; GFX6-NEXT: s_mov_b32 s6, s10 12259; GFX6-NEXT: s_mov_b32 s7, s9 12260; GFX6-NEXT: v_mov_b32_e32 v0, s8 12261; GFX6-NEXT: s_waitcnt vmcnt(0) 12262; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12263; GFX6-NEXT: s_endpgm 12264; 12265; GFX7-LABEL: global_agent_one_as_release_store: 12266; GFX7: ; %bb.0: ; %entry 12267; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12268; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12269; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12270; GFX7-NEXT: v_mov_b32_e32 v0, s6 12271; GFX7-NEXT: v_mov_b32_e32 v1, s7 12272; GFX7-NEXT: v_mov_b32_e32 v2, s4 12273; GFX7-NEXT: s_waitcnt vmcnt(0) 12274; GFX7-NEXT: flat_store_dword v[0:1], v2 12275; GFX7-NEXT: s_endpgm 12276; 12277; GFX10-WGP-LABEL: global_agent_one_as_release_store: 12278; GFX10-WGP: ; %bb.0: ; %entry 12279; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 12280; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12281; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12282; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12283; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12284; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12285; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12286; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12287; GFX10-WGP-NEXT: s_endpgm 12288; 12289; GFX10-CU-LABEL: global_agent_one_as_release_store: 12290; GFX10-CU: ; %bb.0: ; %entry 12291; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 12292; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12293; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12294; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12295; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12296; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12297; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12298; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12299; GFX10-CU-NEXT: s_endpgm 12300; 12301; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_store: 12302; SKIP-CACHE-INV: ; %bb.0: ; %entry 12303; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 12304; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 12305; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 12306; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12307; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12308; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12309; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12310; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12311; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12312; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12313; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12314; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12315; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12316; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12317; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12318; SKIP-CACHE-INV-NEXT: s_endpgm 12319; 12320; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_store: 12321; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12322; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12323; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12324; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12325; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12326; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12327; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12328; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12329; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12330; 12331; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_store: 12332; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12333; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12334; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12335; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12336; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12337; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12338; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12339; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12340; GFX90A-TGSPLIT-NEXT: s_endpgm 12341; 12342; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_store: 12343; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12344; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12345; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12346; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12347; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12348; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12349; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 12350; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12351; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12352; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12353; 12354; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_store: 12355; GFX940-TGSPLIT: ; %bb.0: ; %entry 12356; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12357; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12358; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12359; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12360; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12361; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 12362; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12363; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12364; GFX940-TGSPLIT-NEXT: s_endpgm 12365; 12366; GFX11-WGP-LABEL: global_agent_one_as_release_store: 12367; GFX11-WGP: ; %bb.0: ; %entry 12368; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12369; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12370; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12371; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12372; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12373; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12374; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12375; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12376; GFX11-WGP-NEXT: s_endpgm 12377; 12378; GFX11-CU-LABEL: global_agent_one_as_release_store: 12379; GFX11-CU: ; %bb.0: ; %entry 12380; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12381; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12382; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12383; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12384; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12385; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12386; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12387; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12388; GFX11-CU-NEXT: s_endpgm 12389; 12390; GFX12-WGP-LABEL: global_agent_one_as_release_store: 12391; GFX12-WGP: ; %bb.0: ; %entry 12392; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12393; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12394; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12395; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12396; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12397; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12398; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12399; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12400; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12401; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12402; GFX12-WGP-NEXT: s_endpgm 12403; 12404; GFX12-CU-LABEL: global_agent_one_as_release_store: 12405; GFX12-CU: ; %bb.0: ; %entry 12406; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12407; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12408; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12409; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12410; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12411; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12412; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12413; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12414; GFX12-CU-NEXT: s_wait_storecnt 0x0 12415; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12416; GFX12-CU-NEXT: s_endpgm 12417 i32 %in, ptr addrspace(1) %out) { 12418entry: 12419 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") release, align 4 12420 ret void 12421} 12422 12423define amdgpu_kernel void @global_agent_one_as_seq_cst_store( 12424; GFX6-LABEL: global_agent_one_as_seq_cst_store: 12425; GFX6: ; %bb.0: ; %entry 12426; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 12427; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 12428; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 12429; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12430; GFX6-NEXT: s_mov_b32 s11, s5 12431; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12432; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12433; GFX6-NEXT: s_mov_b32 s10, -1 12434; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12435; GFX6-NEXT: s_mov_b32 s5, s11 12436; GFX6-NEXT: s_mov_b32 s6, s10 12437; GFX6-NEXT: s_mov_b32 s7, s9 12438; GFX6-NEXT: v_mov_b32_e32 v0, s8 12439; GFX6-NEXT: s_waitcnt vmcnt(0) 12440; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12441; GFX6-NEXT: s_endpgm 12442; 12443; GFX7-LABEL: global_agent_one_as_seq_cst_store: 12444; GFX7: ; %bb.0: ; %entry 12445; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12446; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12447; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12448; GFX7-NEXT: v_mov_b32_e32 v0, s6 12449; GFX7-NEXT: v_mov_b32_e32 v1, s7 12450; GFX7-NEXT: v_mov_b32_e32 v2, s4 12451; GFX7-NEXT: s_waitcnt vmcnt(0) 12452; GFX7-NEXT: flat_store_dword v[0:1], v2 12453; GFX7-NEXT: s_endpgm 12454; 12455; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_store: 12456; GFX10-WGP: ; %bb.0: ; %entry 12457; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 12458; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12459; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12460; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12461; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12462; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12463; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12464; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12465; GFX10-WGP-NEXT: s_endpgm 12466; 12467; GFX10-CU-LABEL: global_agent_one_as_seq_cst_store: 12468; GFX10-CU: ; %bb.0: ; %entry 12469; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 12470; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12471; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12472; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12473; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12474; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12475; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12476; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12477; GFX10-CU-NEXT: s_endpgm 12478; 12479; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_store: 12480; SKIP-CACHE-INV: ; %bb.0: ; %entry 12481; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 12482; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 12483; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 12484; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12485; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12486; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12487; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12488; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12489; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12490; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12491; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12492; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12493; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12494; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12495; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12496; SKIP-CACHE-INV-NEXT: s_endpgm 12497; 12498; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store: 12499; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12500; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12501; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12502; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12503; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12504; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12505; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12506; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12507; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12508; 12509; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store: 12510; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12511; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 12512; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12513; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12514; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12515; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12516; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12517; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12518; GFX90A-TGSPLIT-NEXT: s_endpgm 12519; 12520; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store: 12521; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12522; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12523; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12524; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12525; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12526; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12527; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 12528; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12529; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12530; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12531; 12532; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store: 12533; GFX940-TGSPLIT: ; %bb.0: ; %entry 12534; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 12535; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12536; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12537; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12538; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12539; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 12540; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12541; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12542; GFX940-TGSPLIT-NEXT: s_endpgm 12543; 12544; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_store: 12545; GFX11-WGP: ; %bb.0: ; %entry 12546; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12547; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12548; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12549; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12550; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12551; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12552; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12553; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12554; GFX11-WGP-NEXT: s_endpgm 12555; 12556; GFX11-CU-LABEL: global_agent_one_as_seq_cst_store: 12557; GFX11-CU: ; %bb.0: ; %entry 12558; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12559; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12560; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12561; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12562; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12563; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12564; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12565; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12566; GFX11-CU-NEXT: s_endpgm 12567; 12568; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_store: 12569; GFX12-WGP: ; %bb.0: ; %entry 12570; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 12571; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12572; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12573; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12574; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12575; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12576; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12577; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12578; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12579; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12580; GFX12-WGP-NEXT: s_endpgm 12581; 12582; GFX12-CU-LABEL: global_agent_one_as_seq_cst_store: 12583; GFX12-CU: ; %bb.0: ; %entry 12584; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 12585; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12586; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12587; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12588; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12589; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12590; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12591; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12592; GFX12-CU-NEXT: s_wait_storecnt 0x0 12593; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12594; GFX12-CU-NEXT: s_endpgm 12595 i32 %in, ptr addrspace(1) %out) { 12596entry: 12597 store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") seq_cst, align 4 12598 ret void 12599} 12600 12601define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( 12602; GFX6-LABEL: global_agent_one_as_monotonic_atomicrmw: 12603; GFX6: ; %bb.0: ; %entry 12604; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12605; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12606; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12607; GFX6-NEXT: s_mov_b32 s11, s5 12608; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12609; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12610; GFX6-NEXT: s_mov_b32 s10, -1 12611; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12612; GFX6-NEXT: s_mov_b32 s5, s11 12613; GFX6-NEXT: s_mov_b32 s6, s10 12614; GFX6-NEXT: s_mov_b32 s7, s9 12615; GFX6-NEXT: v_mov_b32_e32 v0, s8 12616; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 12617; GFX6-NEXT: s_endpgm 12618; 12619; GFX7-LABEL: global_agent_one_as_monotonic_atomicrmw: 12620; GFX7: ; %bb.0: ; %entry 12621; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12622; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 12623; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12624; GFX7-NEXT: v_mov_b32_e32 v0, s6 12625; GFX7-NEXT: v_mov_b32_e32 v1, s7 12626; GFX7-NEXT: v_mov_b32_e32 v2, s4 12627; GFX7-NEXT: flat_atomic_swap v[0:1], v2 12628; GFX7-NEXT: s_endpgm 12629; 12630; GFX10-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw: 12631; GFX10-WGP: ; %bb.0: ; %entry 12632; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12633; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12634; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12635; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12636; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12637; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 12638; GFX10-WGP-NEXT: s_endpgm 12639; 12640; GFX10-CU-LABEL: global_agent_one_as_monotonic_atomicrmw: 12641; GFX10-CU: ; %bb.0: ; %entry 12642; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12643; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12644; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12645; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12646; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12647; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 12648; GFX10-CU-NEXT: s_endpgm 12649; 12650; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_atomicrmw: 12651; SKIP-CACHE-INV: ; %bb.0: ; %entry 12652; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12653; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12654; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12655; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12656; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12657; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12658; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12659; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12660; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12661; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12662; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12663; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12664; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 12665; SKIP-CACHE-INV-NEXT: s_endpgm 12666; 12667; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: 12668; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12669; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12670; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12671; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12672; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12673; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12674; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12675; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12676; 12677; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: 12678; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12679; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12680; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12681; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12682; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12683; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12684; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12685; GFX90A-TGSPLIT-NEXT: s_endpgm 12686; 12687; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: 12688; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12689; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12690; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12691; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12692; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12693; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12694; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 12695; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12696; 12697; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: 12698; GFX940-TGSPLIT: ; %bb.0: ; %entry 12699; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12700; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12701; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12702; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12703; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12704; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 12705; GFX940-TGSPLIT-NEXT: s_endpgm 12706; 12707; GFX11-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw: 12708; GFX11-WGP: ; %bb.0: ; %entry 12709; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12710; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12711; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12712; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12713; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12714; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12715; GFX11-WGP-NEXT: s_endpgm 12716; 12717; GFX11-CU-LABEL: global_agent_one_as_monotonic_atomicrmw: 12718; GFX11-CU: ; %bb.0: ; %entry 12719; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12720; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12721; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12722; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12723; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12724; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12725; GFX11-CU-NEXT: s_endpgm 12726; 12727; GFX12-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw: 12728; GFX12-WGP: ; %bb.0: ; %entry 12729; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12730; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12731; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12732; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12733; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12734; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12735; GFX12-WGP-NEXT: s_endpgm 12736; 12737; GFX12-CU-LABEL: global_agent_one_as_monotonic_atomicrmw: 12738; GFX12-CU: ; %bb.0: ; %entry 12739; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12740; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12741; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12742; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12743; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12744; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12745; GFX12-CU-NEXT: s_endpgm 12746 ptr addrspace(1) %out, i32 %in) { 12747entry: 12748 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") monotonic 12749 ret void 12750} 12751 12752define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( 12753; GFX6-LABEL: global_agent_one_as_acquire_atomicrmw: 12754; GFX6: ; %bb.0: ; %entry 12755; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12756; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12757; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12758; GFX6-NEXT: s_mov_b32 s11, s5 12759; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12760; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12761; GFX6-NEXT: s_mov_b32 s10, -1 12762; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12763; GFX6-NEXT: s_mov_b32 s5, s11 12764; GFX6-NEXT: s_mov_b32 s6, s10 12765; GFX6-NEXT: s_mov_b32 s7, s9 12766; GFX6-NEXT: v_mov_b32_e32 v0, s8 12767; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 12768; GFX6-NEXT: s_waitcnt vmcnt(0) 12769; GFX6-NEXT: buffer_wbinvl1 12770; GFX6-NEXT: s_endpgm 12771; 12772; GFX7-LABEL: global_agent_one_as_acquire_atomicrmw: 12773; GFX7: ; %bb.0: ; %entry 12774; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12775; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 12776; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12777; GFX7-NEXT: v_mov_b32_e32 v0, s6 12778; GFX7-NEXT: v_mov_b32_e32 v1, s7 12779; GFX7-NEXT: v_mov_b32_e32 v2, s4 12780; GFX7-NEXT: flat_atomic_swap v[0:1], v2 12781; GFX7-NEXT: s_waitcnt vmcnt(0) 12782; GFX7-NEXT: buffer_wbinvl1_vol 12783; GFX7-NEXT: s_endpgm 12784; 12785; GFX10-WGP-LABEL: global_agent_one_as_acquire_atomicrmw: 12786; GFX10-WGP: ; %bb.0: ; %entry 12787; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12788; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12789; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12790; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12791; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12792; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 12793; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12794; GFX10-WGP-NEXT: buffer_gl1_inv 12795; GFX10-WGP-NEXT: buffer_gl0_inv 12796; GFX10-WGP-NEXT: s_endpgm 12797; 12798; GFX10-CU-LABEL: global_agent_one_as_acquire_atomicrmw: 12799; GFX10-CU: ; %bb.0: ; %entry 12800; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12801; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12802; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12803; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12804; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12805; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 12806; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12807; GFX10-CU-NEXT: buffer_gl1_inv 12808; GFX10-CU-NEXT: buffer_gl0_inv 12809; GFX10-CU-NEXT: s_endpgm 12810; 12811; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw: 12812; SKIP-CACHE-INV: ; %bb.0: ; %entry 12813; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12814; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12815; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12816; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12817; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12818; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12819; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12820; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12821; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12822; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12823; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12824; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12825; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 12826; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12827; SKIP-CACHE-INV-NEXT: s_endpgm 12828; 12829; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: 12830; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12831; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12832; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12833; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12834; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12835; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12836; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12837; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12838; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12839; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12840; 12841; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: 12842; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12843; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12844; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12845; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12846; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12847; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12848; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12849; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12850; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12851; GFX90A-TGSPLIT-NEXT: s_endpgm 12852; 12853; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: 12854; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12855; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12856; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12857; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12858; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12859; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12860; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 12861; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12862; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 12863; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12864; 12865; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: 12866; GFX940-TGSPLIT: ; %bb.0: ; %entry 12867; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12868; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12869; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12870; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12871; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12872; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 12873; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12874; GFX940-TGSPLIT-NEXT: buffer_inv sc1 12875; GFX940-TGSPLIT-NEXT: s_endpgm 12876; 12877; GFX11-WGP-LABEL: global_agent_one_as_acquire_atomicrmw: 12878; GFX11-WGP: ; %bb.0: ; %entry 12879; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12880; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12881; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12882; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12883; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12884; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12885; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12886; GFX11-WGP-NEXT: buffer_gl1_inv 12887; GFX11-WGP-NEXT: buffer_gl0_inv 12888; GFX11-WGP-NEXT: s_endpgm 12889; 12890; GFX11-CU-LABEL: global_agent_one_as_acquire_atomicrmw: 12891; GFX11-CU: ; %bb.0: ; %entry 12892; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12893; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12894; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12895; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12896; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12897; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12898; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12899; GFX11-CU-NEXT: buffer_gl1_inv 12900; GFX11-CU-NEXT: buffer_gl0_inv 12901; GFX11-CU-NEXT: s_endpgm 12902; 12903; GFX12-WGP-LABEL: global_agent_one_as_acquire_atomicrmw: 12904; GFX12-WGP: ; %bb.0: ; %entry 12905; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12906; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12907; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12908; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12909; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12910; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12911; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12912; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 12913; GFX12-WGP-NEXT: s_endpgm 12914; 12915; GFX12-CU-LABEL: global_agent_one_as_acquire_atomicrmw: 12916; GFX12-CU: ; %bb.0: ; %entry 12917; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12918; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12919; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12920; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12921; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12922; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 12923; GFX12-CU-NEXT: s_wait_storecnt 0x0 12924; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 12925; GFX12-CU-NEXT: s_endpgm 12926 ptr addrspace(1) %out, i32 %in) { 12927entry: 12928 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire 12929 ret void 12930} 12931 12932define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( 12933; GFX6-LABEL: global_agent_one_as_release_atomicrmw: 12934; GFX6: ; %bb.0: ; %entry 12935; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12936; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12937; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12938; GFX6-NEXT: s_mov_b32 s11, s5 12939; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12940; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12941; GFX6-NEXT: s_mov_b32 s10, -1 12942; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12943; GFX6-NEXT: s_mov_b32 s5, s11 12944; GFX6-NEXT: s_mov_b32 s6, s10 12945; GFX6-NEXT: s_mov_b32 s7, s9 12946; GFX6-NEXT: v_mov_b32_e32 v0, s8 12947; GFX6-NEXT: s_waitcnt vmcnt(0) 12948; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 12949; GFX6-NEXT: s_endpgm 12950; 12951; GFX7-LABEL: global_agent_one_as_release_atomicrmw: 12952; GFX7: ; %bb.0: ; %entry 12953; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12954; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 12955; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12956; GFX7-NEXT: v_mov_b32_e32 v0, s6 12957; GFX7-NEXT: v_mov_b32_e32 v1, s7 12958; GFX7-NEXT: v_mov_b32_e32 v2, s4 12959; GFX7-NEXT: s_waitcnt vmcnt(0) 12960; GFX7-NEXT: flat_atomic_swap v[0:1], v2 12961; GFX7-NEXT: s_endpgm 12962; 12963; GFX10-WGP-LABEL: global_agent_one_as_release_atomicrmw: 12964; GFX10-WGP: ; %bb.0: ; %entry 12965; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12966; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12967; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12968; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12969; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12970; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12971; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12972; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 12973; GFX10-WGP-NEXT: s_endpgm 12974; 12975; GFX10-CU-LABEL: global_agent_one_as_release_atomicrmw: 12976; GFX10-CU: ; %bb.0: ; %entry 12977; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12978; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12979; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12980; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12981; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12982; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12983; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12984; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 12985; GFX10-CU-NEXT: s_endpgm 12986; 12987; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_atomicrmw: 12988; SKIP-CACHE-INV: ; %bb.0: ; %entry 12989; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12990; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12991; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12992; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12993; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12994; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12995; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12996; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12997; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12998; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12999; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 13000; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 13001; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13002; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 13003; SKIP-CACHE-INV-NEXT: s_endpgm 13004; 13005; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: 13006; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13007; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13008; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13009; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13010; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13011; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13012; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13013; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 13014; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13015; 13016; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: 13017; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13018; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13019; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13020; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13021; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13022; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13023; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13024; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 13025; GFX90A-TGSPLIT-NEXT: s_endpgm 13026; 13027; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: 13028; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13029; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13030; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13031; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13032; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13033; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13034; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 13035; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13036; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 13037; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13038; 13039; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: 13040; GFX940-TGSPLIT: ; %bb.0: ; %entry 13041; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13042; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13043; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13044; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13045; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13046; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 13047; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13048; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 13049; GFX940-TGSPLIT-NEXT: s_endpgm 13050; 13051; GFX11-WGP-LABEL: global_agent_one_as_release_atomicrmw: 13052; GFX11-WGP: ; %bb.0: ; %entry 13053; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13054; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13055; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13056; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13057; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 13058; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13059; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13060; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 13061; GFX11-WGP-NEXT: s_endpgm 13062; 13063; GFX11-CU-LABEL: global_agent_one_as_release_atomicrmw: 13064; GFX11-CU: ; %bb.0: ; %entry 13065; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13066; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13067; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13068; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13069; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 13070; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13071; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13072; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 13073; GFX11-CU-NEXT: s_endpgm 13074; 13075; GFX12-WGP-LABEL: global_agent_one_as_release_atomicrmw: 13076; GFX12-WGP: ; %bb.0: ; %entry 13077; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13078; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13079; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13080; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13081; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 13082; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13083; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13084; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13085; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13086; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 13087; GFX12-WGP-NEXT: s_endpgm 13088; 13089; GFX12-CU-LABEL: global_agent_one_as_release_atomicrmw: 13090; GFX12-CU: ; %bb.0: ; %entry 13091; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13092; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13093; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13094; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13095; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 13096; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13097; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13098; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13099; GFX12-CU-NEXT: s_wait_storecnt 0x0 13100; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 13101; GFX12-CU-NEXT: s_endpgm 13102 ptr addrspace(1) %out, i32 %in) { 13103entry: 13104 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") release 13105 ret void 13106} 13107 13108define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( 13109; GFX6-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13110; GFX6: ; %bb.0: ; %entry 13111; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13112; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 13113; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13114; GFX6-NEXT: s_mov_b32 s11, s5 13115; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13116; GFX6-NEXT: s_mov_b32 s9, 0x100f000 13117; GFX6-NEXT: s_mov_b32 s10, -1 13118; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13119; GFX6-NEXT: s_mov_b32 s5, s11 13120; GFX6-NEXT: s_mov_b32 s6, s10 13121; GFX6-NEXT: s_mov_b32 s7, s9 13122; GFX6-NEXT: v_mov_b32_e32 v0, s8 13123; GFX6-NEXT: s_waitcnt vmcnt(0) 13124; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 13125; GFX6-NEXT: s_waitcnt vmcnt(0) 13126; GFX6-NEXT: buffer_wbinvl1 13127; GFX6-NEXT: s_endpgm 13128; 13129; GFX7-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13130; GFX7: ; %bb.0: ; %entry 13131; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13132; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 13133; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13134; GFX7-NEXT: v_mov_b32_e32 v0, s6 13135; GFX7-NEXT: v_mov_b32_e32 v1, s7 13136; GFX7-NEXT: v_mov_b32_e32 v2, s4 13137; GFX7-NEXT: s_waitcnt vmcnt(0) 13138; GFX7-NEXT: flat_atomic_swap v[0:1], v2 13139; GFX7-NEXT: s_waitcnt vmcnt(0) 13140; GFX7-NEXT: buffer_wbinvl1_vol 13141; GFX7-NEXT: s_endpgm 13142; 13143; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13144; GFX10-WGP: ; %bb.0: ; %entry 13145; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13146; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13147; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13148; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13149; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 13150; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13151; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13152; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 13153; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13154; GFX10-WGP-NEXT: buffer_gl1_inv 13155; GFX10-WGP-NEXT: buffer_gl0_inv 13156; GFX10-WGP-NEXT: s_endpgm 13157; 13158; GFX10-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13159; GFX10-CU: ; %bb.0: ; %entry 13160; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13161; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13162; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13163; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13164; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 13165; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13166; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13167; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 13168; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13169; GFX10-CU-NEXT: buffer_gl1_inv 13170; GFX10-CU-NEXT: buffer_gl0_inv 13171; GFX10-CU-NEXT: s_endpgm 13172; 13173; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13174; SKIP-CACHE-INV: ; %bb.0: ; %entry 13175; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13176; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 13177; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13178; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 13179; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13180; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 13181; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 13182; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13183; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 13184; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 13185; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 13186; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 13187; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13188; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 13189; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13190; SKIP-CACHE-INV-NEXT: s_endpgm 13191; 13192; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13193; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13194; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13195; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13196; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13197; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13198; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13199; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13200; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 13201; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13202; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13203; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13204; 13205; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13206; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13207; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13208; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13209; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13210; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13211; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13212; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13213; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 13214; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13215; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13216; GFX90A-TGSPLIT-NEXT: s_endpgm 13217; 13218; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13219; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13220; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13221; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13222; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13223; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13224; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13225; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 13226; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13227; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 13228; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13229; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 13230; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13231; 13232; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13233; GFX940-TGSPLIT: ; %bb.0: ; %entry 13234; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13235; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13236; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13237; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13238; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13239; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 13240; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13241; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 13242; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13243; GFX940-TGSPLIT-NEXT: buffer_inv sc1 13244; GFX940-TGSPLIT-NEXT: s_endpgm 13245; 13246; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13247; GFX11-WGP: ; %bb.0: ; %entry 13248; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13249; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13250; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13251; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13252; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 13253; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13254; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13255; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 13256; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13257; GFX11-WGP-NEXT: buffer_gl1_inv 13258; GFX11-WGP-NEXT: buffer_gl0_inv 13259; GFX11-WGP-NEXT: s_endpgm 13260; 13261; GFX11-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13262; GFX11-CU: ; %bb.0: ; %entry 13263; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13264; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13265; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13266; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13267; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 13268; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13269; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13270; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 13271; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13272; GFX11-CU-NEXT: buffer_gl1_inv 13273; GFX11-CU-NEXT: buffer_gl0_inv 13274; GFX11-CU-NEXT: s_endpgm 13275; 13276; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13277; GFX12-WGP: ; %bb.0: ; %entry 13278; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13279; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13280; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13281; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13282; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 13283; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13284; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13285; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13286; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13287; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 13288; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13289; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 13290; GFX12-WGP-NEXT: s_endpgm 13291; 13292; GFX12-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw: 13293; GFX12-CU: ; %bb.0: ; %entry 13294; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13295; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13296; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13297; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13298; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 13299; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13300; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13301; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13302; GFX12-CU-NEXT: s_wait_storecnt 0x0 13303; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 13304; GFX12-CU-NEXT: s_wait_storecnt 0x0 13305; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 13306; GFX12-CU-NEXT: s_endpgm 13307 ptr addrspace(1) %out, i32 %in) { 13308entry: 13309 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel 13310 ret void 13311} 13312 13313define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( 13314; GFX6-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13315; GFX6: ; %bb.0: ; %entry 13316; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13317; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 13318; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13319; GFX6-NEXT: s_mov_b32 s11, s5 13320; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13321; GFX6-NEXT: s_mov_b32 s9, 0x100f000 13322; GFX6-NEXT: s_mov_b32 s10, -1 13323; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13324; GFX6-NEXT: s_mov_b32 s5, s11 13325; GFX6-NEXT: s_mov_b32 s6, s10 13326; GFX6-NEXT: s_mov_b32 s7, s9 13327; GFX6-NEXT: v_mov_b32_e32 v0, s8 13328; GFX6-NEXT: s_waitcnt vmcnt(0) 13329; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 13330; GFX6-NEXT: s_waitcnt vmcnt(0) 13331; GFX6-NEXT: buffer_wbinvl1 13332; GFX6-NEXT: s_endpgm 13333; 13334; GFX7-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13335; GFX7: ; %bb.0: ; %entry 13336; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13337; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 13338; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13339; GFX7-NEXT: v_mov_b32_e32 v0, s6 13340; GFX7-NEXT: v_mov_b32_e32 v1, s7 13341; GFX7-NEXT: v_mov_b32_e32 v2, s4 13342; GFX7-NEXT: s_waitcnt vmcnt(0) 13343; GFX7-NEXT: flat_atomic_swap v[0:1], v2 13344; GFX7-NEXT: s_waitcnt vmcnt(0) 13345; GFX7-NEXT: buffer_wbinvl1_vol 13346; GFX7-NEXT: s_endpgm 13347; 13348; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13349; GFX10-WGP: ; %bb.0: ; %entry 13350; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13351; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13352; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13353; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13354; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 13355; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13356; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13357; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 13358; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13359; GFX10-WGP-NEXT: buffer_gl1_inv 13360; GFX10-WGP-NEXT: buffer_gl0_inv 13361; GFX10-WGP-NEXT: s_endpgm 13362; 13363; GFX10-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13364; GFX10-CU: ; %bb.0: ; %entry 13365; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13366; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13367; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13368; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13369; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 13370; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13371; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13372; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 13373; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13374; GFX10-CU-NEXT: buffer_gl1_inv 13375; GFX10-CU-NEXT: buffer_gl0_inv 13376; GFX10-CU-NEXT: s_endpgm 13377; 13378; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13379; SKIP-CACHE-INV: ; %bb.0: ; %entry 13380; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13381; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 13382; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13383; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 13384; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13385; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 13386; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 13387; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13388; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 13389; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 13390; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 13391; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 13392; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13393; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 13394; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13395; SKIP-CACHE-INV-NEXT: s_endpgm 13396; 13397; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13398; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13399; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13400; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13401; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13402; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13403; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13404; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13405; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 13406; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13407; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13408; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13409; 13410; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13411; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13412; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13413; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13414; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13415; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13416; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13417; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13418; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 13419; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13420; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13421; GFX90A-TGSPLIT-NEXT: s_endpgm 13422; 13423; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13424; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13425; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13426; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13427; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13428; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13429; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13430; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 13431; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13432; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 13433; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13434; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 13435; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13436; 13437; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13438; GFX940-TGSPLIT: ; %bb.0: ; %entry 13439; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13440; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13441; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13442; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13443; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13444; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 13445; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13446; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 13447; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13448; GFX940-TGSPLIT-NEXT: buffer_inv sc1 13449; GFX940-TGSPLIT-NEXT: s_endpgm 13450; 13451; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13452; GFX11-WGP: ; %bb.0: ; %entry 13453; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13454; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13455; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13456; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13457; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 13458; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13459; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13460; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 13461; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13462; GFX11-WGP-NEXT: buffer_gl1_inv 13463; GFX11-WGP-NEXT: buffer_gl0_inv 13464; GFX11-WGP-NEXT: s_endpgm 13465; 13466; GFX11-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13467; GFX11-CU: ; %bb.0: ; %entry 13468; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13469; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13470; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13471; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13472; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 13473; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13474; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13475; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 13476; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13477; GFX11-CU-NEXT: buffer_gl1_inv 13478; GFX11-CU-NEXT: buffer_gl0_inv 13479; GFX11-CU-NEXT: s_endpgm 13480; 13481; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13482; GFX12-WGP: ; %bb.0: ; %entry 13483; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13484; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13485; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13486; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13487; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 13488; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13489; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13490; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13491; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13492; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 13493; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13494; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 13495; GFX12-WGP-NEXT: s_endpgm 13496; 13497; GFX12-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw: 13498; GFX12-CU: ; %bb.0: ; %entry 13499; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13500; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13501; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13502; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13503; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 13504; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13505; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13506; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13507; GFX12-CU-NEXT: s_wait_storecnt 0x0 13508; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV 13509; GFX12-CU-NEXT: s_wait_storecnt 0x0 13510; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 13511; GFX12-CU-NEXT: s_endpgm 13512 ptr addrspace(1) %out, i32 %in) { 13513entry: 13514 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst 13515 ret void 13516} 13517 13518define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( 13519; GFX6-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13520; GFX6: ; %bb.0: ; %entry 13521; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13522; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 13523; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13524; GFX6-NEXT: s_mov_b32 s11, s5 13525; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13526; GFX6-NEXT: s_mov_b32 s9, 0x100f000 13527; GFX6-NEXT: s_mov_b32 s10, -1 13528; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13529; GFX6-NEXT: s_mov_b32 s5, s11 13530; GFX6-NEXT: s_mov_b32 s6, s10 13531; GFX6-NEXT: s_mov_b32 s7, s9 13532; GFX6-NEXT: v_mov_b32_e32 v0, s8 13533; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 13534; GFX6-NEXT: s_waitcnt vmcnt(0) 13535; GFX6-NEXT: buffer_wbinvl1 13536; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 13537; GFX6-NEXT: s_endpgm 13538; 13539; GFX7-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13540; GFX7: ; %bb.0: ; %entry 13541; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13542; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 13543; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13544; GFX7-NEXT: v_mov_b32_e32 v0, s4 13545; GFX7-NEXT: v_mov_b32_e32 v1, s5 13546; GFX7-NEXT: v_mov_b32_e32 v2, s6 13547; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13548; GFX7-NEXT: s_waitcnt vmcnt(0) 13549; GFX7-NEXT: buffer_wbinvl1_vol 13550; GFX7-NEXT: v_mov_b32_e32 v0, s4 13551; GFX7-NEXT: v_mov_b32_e32 v1, s5 13552; GFX7-NEXT: flat_store_dword v[0:1], v2 13553; GFX7-NEXT: s_endpgm 13554; 13555; GFX10-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13556; GFX10-WGP: ; %bb.0: ; %entry 13557; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13558; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13559; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13560; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13561; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 13562; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13563; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13564; GFX10-WGP-NEXT: buffer_gl1_inv 13565; GFX10-WGP-NEXT: buffer_gl0_inv 13566; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 13567; GFX10-WGP-NEXT: s_endpgm 13568; 13569; GFX10-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13570; GFX10-CU: ; %bb.0: ; %entry 13571; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13572; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13573; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13574; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13575; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 13576; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13577; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13578; GFX10-CU-NEXT: buffer_gl1_inv 13579; GFX10-CU-NEXT: buffer_gl0_inv 13580; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 13581; GFX10-CU-NEXT: s_endpgm 13582; 13583; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13584; SKIP-CACHE-INV: ; %bb.0: ; %entry 13585; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13586; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 13587; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13588; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 13589; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13590; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 13591; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 13592; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13593; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 13594; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 13595; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 13596; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 13597; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 13598; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13599; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 13600; SKIP-CACHE-INV-NEXT: s_endpgm 13601; 13602; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13603; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13604; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13605; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13606; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13607; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13608; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13609; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13610; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13611; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13612; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 13613; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13614; 13615; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13616; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13617; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13618; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13619; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13620; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13621; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13622; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13623; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13624; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13625; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 13626; GFX90A-TGSPLIT-NEXT: s_endpgm 13627; 13628; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13629; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13630; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13631; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13632; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13633; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13634; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13635; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 13636; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13637; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 13638; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 13639; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13640; 13641; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13642; GFX940-TGSPLIT: ; %bb.0: ; %entry 13643; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13644; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13645; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13646; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13647; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13648; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 13649; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13650; GFX940-TGSPLIT-NEXT: buffer_inv sc1 13651; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 13652; GFX940-TGSPLIT-NEXT: s_endpgm 13653; 13654; GFX11-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13655; GFX11-WGP: ; %bb.0: ; %entry 13656; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13657; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13658; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13659; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13660; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 13661; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 13662; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13663; GFX11-WGP-NEXT: buffer_gl1_inv 13664; GFX11-WGP-NEXT: buffer_gl0_inv 13665; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 13666; GFX11-WGP-NEXT: s_endpgm 13667; 13668; GFX11-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13669; GFX11-CU: ; %bb.0: ; %entry 13670; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13671; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13672; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13673; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13674; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 13675; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 13676; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13677; GFX11-CU-NEXT: buffer_gl1_inv 13678; GFX11-CU-NEXT: buffer_gl0_inv 13679; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 13680; GFX11-CU-NEXT: s_endpgm 13681; 13682; GFX12-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13683; GFX12-WGP: ; %bb.0: ; %entry 13684; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13685; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13686; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13687; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13688; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 13689; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 13690; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13691; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 13692; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 13693; GFX12-WGP-NEXT: s_endpgm 13694; 13695; GFX12-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 13696; GFX12-CU: ; %bb.0: ; %entry 13697; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13698; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13699; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13700; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13701; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 13702; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 13703; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13704; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 13705; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 13706; GFX12-CU-NEXT: s_endpgm 13707 ptr addrspace(1) %out, i32 %in) { 13708entry: 13709 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire 13710 store i32 %val, ptr addrspace(1) %out, align 4 13711 ret void 13712} 13713 13714define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( 13715; GFX6-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13716; GFX6: ; %bb.0: ; %entry 13717; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13718; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 13719; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13720; GFX6-NEXT: s_mov_b32 s11, s5 13721; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13722; GFX6-NEXT: s_mov_b32 s9, 0x100f000 13723; GFX6-NEXT: s_mov_b32 s10, -1 13724; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13725; GFX6-NEXT: s_mov_b32 s5, s11 13726; GFX6-NEXT: s_mov_b32 s6, s10 13727; GFX6-NEXT: s_mov_b32 s7, s9 13728; GFX6-NEXT: v_mov_b32_e32 v0, s8 13729; GFX6-NEXT: s_waitcnt vmcnt(0) 13730; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 13731; GFX6-NEXT: s_waitcnt vmcnt(0) 13732; GFX6-NEXT: buffer_wbinvl1 13733; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 13734; GFX6-NEXT: s_endpgm 13735; 13736; GFX7-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13737; GFX7: ; %bb.0: ; %entry 13738; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13739; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 13740; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13741; GFX7-NEXT: v_mov_b32_e32 v0, s4 13742; GFX7-NEXT: v_mov_b32_e32 v1, s5 13743; GFX7-NEXT: v_mov_b32_e32 v2, s6 13744; GFX7-NEXT: s_waitcnt vmcnt(0) 13745; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13746; GFX7-NEXT: s_waitcnt vmcnt(0) 13747; GFX7-NEXT: buffer_wbinvl1_vol 13748; GFX7-NEXT: v_mov_b32_e32 v0, s4 13749; GFX7-NEXT: v_mov_b32_e32 v1, s5 13750; GFX7-NEXT: flat_store_dword v[0:1], v2 13751; GFX7-NEXT: s_endpgm 13752; 13753; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13754; GFX10-WGP: ; %bb.0: ; %entry 13755; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13756; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13757; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13758; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13759; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 13760; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13761; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13762; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13763; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13764; GFX10-WGP-NEXT: buffer_gl1_inv 13765; GFX10-WGP-NEXT: buffer_gl0_inv 13766; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 13767; GFX10-WGP-NEXT: s_endpgm 13768; 13769; GFX10-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13770; GFX10-CU: ; %bb.0: ; %entry 13771; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13772; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13773; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13774; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13775; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 13776; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13777; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13778; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13779; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13780; GFX10-CU-NEXT: buffer_gl1_inv 13781; GFX10-CU-NEXT: buffer_gl0_inv 13782; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 13783; GFX10-CU-NEXT: s_endpgm 13784; 13785; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13786; SKIP-CACHE-INV: ; %bb.0: ; %entry 13787; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13788; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 13789; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13790; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 13791; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13792; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 13793; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 13794; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13795; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 13796; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 13797; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 13798; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 13799; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13800; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 13801; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13802; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 13803; SKIP-CACHE-INV-NEXT: s_endpgm 13804; 13805; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13806; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13807; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13808; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13809; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13810; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13811; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13812; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13813; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13814; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13815; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13816; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 13817; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13818; 13819; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13820; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13821; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13822; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13823; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13824; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13825; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13826; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13827; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13828; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13829; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13830; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 13831; GFX90A-TGSPLIT-NEXT: s_endpgm 13832; 13833; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13834; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13835; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13836; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13837; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13838; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13839; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13840; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 13841; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13842; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 13843; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13844; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 13845; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 13846; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13847; 13848; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13849; GFX940-TGSPLIT: ; %bb.0: ; %entry 13850; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13851; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13852; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13853; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13854; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13855; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 13856; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13857; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 13858; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13859; GFX940-TGSPLIT-NEXT: buffer_inv sc1 13860; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 13861; GFX940-TGSPLIT-NEXT: s_endpgm 13862; 13863; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13864; GFX11-WGP: ; %bb.0: ; %entry 13865; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13866; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13867; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13868; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13869; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 13870; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13871; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13872; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 13873; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13874; GFX11-WGP-NEXT: buffer_gl1_inv 13875; GFX11-WGP-NEXT: buffer_gl0_inv 13876; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 13877; GFX11-WGP-NEXT: s_endpgm 13878; 13879; GFX11-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13880; GFX11-CU: ; %bb.0: ; %entry 13881; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13882; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13883; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13884; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13885; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 13886; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13887; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13888; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 13889; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13890; GFX11-CU-NEXT: buffer_gl1_inv 13891; GFX11-CU-NEXT: buffer_gl0_inv 13892; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 13893; GFX11-CU-NEXT: s_endpgm 13894; 13895; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13896; GFX12-WGP: ; %bb.0: ; %entry 13897; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13898; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13899; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13900; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13901; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 13902; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13903; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13904; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13905; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13906; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 13907; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13908; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13909; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13910; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 13911; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 13912; GFX12-WGP-NEXT: s_endpgm 13913; 13914; GFX12-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 13915; GFX12-CU: ; %bb.0: ; %entry 13916; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13917; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13918; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13919; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13920; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 13921; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13922; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13923; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13924; GFX12-CU-NEXT: s_wait_storecnt 0x0 13925; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 13926; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13927; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13928; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13929; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 13930; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 13931; GFX12-CU-NEXT: s_endpgm 13932 ptr addrspace(1) %out, i32 %in) { 13933entry: 13934 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel 13935 store i32 %val, ptr addrspace(1) %out, align 4 13936 ret void 13937} 13938 13939define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( 13940; GFX6-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 13941; GFX6: ; %bb.0: ; %entry 13942; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13943; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 13944; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13945; GFX6-NEXT: s_mov_b32 s11, s5 13946; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13947; GFX6-NEXT: s_mov_b32 s9, 0x100f000 13948; GFX6-NEXT: s_mov_b32 s10, -1 13949; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13950; GFX6-NEXT: s_mov_b32 s5, s11 13951; GFX6-NEXT: s_mov_b32 s6, s10 13952; GFX6-NEXT: s_mov_b32 s7, s9 13953; GFX6-NEXT: v_mov_b32_e32 v0, s8 13954; GFX6-NEXT: s_waitcnt vmcnt(0) 13955; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 13956; GFX6-NEXT: s_waitcnt vmcnt(0) 13957; GFX6-NEXT: buffer_wbinvl1 13958; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 13959; GFX6-NEXT: s_endpgm 13960; 13961; GFX7-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 13962; GFX7: ; %bb.0: ; %entry 13963; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13964; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 13965; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13966; GFX7-NEXT: v_mov_b32_e32 v0, s4 13967; GFX7-NEXT: v_mov_b32_e32 v1, s5 13968; GFX7-NEXT: v_mov_b32_e32 v2, s6 13969; GFX7-NEXT: s_waitcnt vmcnt(0) 13970; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13971; GFX7-NEXT: s_waitcnt vmcnt(0) 13972; GFX7-NEXT: buffer_wbinvl1_vol 13973; GFX7-NEXT: v_mov_b32_e32 v0, s4 13974; GFX7-NEXT: v_mov_b32_e32 v1, s5 13975; GFX7-NEXT: flat_store_dword v[0:1], v2 13976; GFX7-NEXT: s_endpgm 13977; 13978; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 13979; GFX10-WGP: ; %bb.0: ; %entry 13980; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13981; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13982; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13983; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13984; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 13985; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13986; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13987; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 13988; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13989; GFX10-WGP-NEXT: buffer_gl1_inv 13990; GFX10-WGP-NEXT: buffer_gl0_inv 13991; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 13992; GFX10-WGP-NEXT: s_endpgm 13993; 13994; GFX10-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 13995; GFX10-CU: ; %bb.0: ; %entry 13996; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13997; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13998; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13999; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14000; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 14001; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14002; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14003; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 14004; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14005; GFX10-CU-NEXT: buffer_gl1_inv 14006; GFX10-CU-NEXT: buffer_gl0_inv 14007; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 14008; GFX10-CU-NEXT: s_endpgm 14009; 14010; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14011; SKIP-CACHE-INV: ; %bb.0: ; %entry 14012; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14013; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 14014; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14015; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 14016; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14017; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 14018; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 14019; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14020; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 14021; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 14022; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 14023; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 14024; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14025; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 14026; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14027; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 14028; SKIP-CACHE-INV-NEXT: s_endpgm 14029; 14030; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14031; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14032; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14033; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14034; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 14035; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14036; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14037; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14038; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 14039; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14040; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14041; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 14042; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14043; 14044; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14045; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14046; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14047; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14048; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 14049; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14050; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14051; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14052; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 14053; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14054; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14055; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 14056; GFX90A-TGSPLIT-NEXT: s_endpgm 14057; 14058; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14059; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14060; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14061; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14062; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 14063; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14064; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14065; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 14066; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14067; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 14068; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14069; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 14070; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 14071; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14072; 14073; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14074; GFX940-TGSPLIT: ; %bb.0: ; %entry 14075; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14076; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14077; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 14078; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14079; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14080; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 14081; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14082; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 14083; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14084; GFX940-TGSPLIT-NEXT: buffer_inv sc1 14085; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 14086; GFX940-TGSPLIT-NEXT: s_endpgm 14087; 14088; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14089; GFX11-WGP: ; %bb.0: ; %entry 14090; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14091; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14092; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 14093; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14094; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 14095; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14096; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14097; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 14098; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14099; GFX11-WGP-NEXT: buffer_gl1_inv 14100; GFX11-WGP-NEXT: buffer_gl0_inv 14101; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 14102; GFX11-WGP-NEXT: s_endpgm 14103; 14104; GFX11-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14105; GFX11-CU: ; %bb.0: ; %entry 14106; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14107; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14108; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 14109; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14110; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 14111; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14112; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14113; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 14114; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14115; GFX11-CU-NEXT: buffer_gl1_inv 14116; GFX11-CU-NEXT: buffer_gl0_inv 14117; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 14118; GFX11-CU-NEXT: s_endpgm 14119; 14120; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14121; GFX12-WGP: ; %bb.0: ; %entry 14122; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14123; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14124; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 14125; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14126; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 14127; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14128; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14129; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14130; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14131; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 14132; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14133; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14134; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14135; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 14136; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 14137; GFX12-WGP-NEXT: s_endpgm 14138; 14139; GFX12-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 14140; GFX12-CU: ; %bb.0: ; %entry 14141; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14142; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14143; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 14144; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14145; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 14146; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14147; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14148; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14149; GFX12-CU-NEXT: s_wait_storecnt 0x0 14150; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 14151; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14152; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14153; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14154; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 14155; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 14156; GFX12-CU-NEXT: s_endpgm 14157 ptr addrspace(1) %out, i32 %in) { 14158entry: 14159 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst 14160 store i32 %val, ptr addrspace(1) %out, align 4 14161 ret void 14162} 14163 14164define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( 14165; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14166; GFX6: ; %bb.0: ; %entry 14167; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14168; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14169; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14170; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14171; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14172; GFX6-NEXT: s_mov_b32 s12, s5 14173; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14174; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14175; GFX6-NEXT: s_mov_b32 s11, -1 14176; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14177; GFX6-NEXT: s_mov_b32 s5, s12 14178; GFX6-NEXT: s_mov_b32 s6, s11 14179; GFX6-NEXT: s_mov_b32 s7, s10 14180; GFX6-NEXT: v_mov_b32_e32 v0, s9 14181; GFX6-NEXT: v_mov_b32_e32 v2, s8 14182; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14183; GFX6-NEXT: v_mov_b32_e32 v1, v2 14184; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14185; GFX6-NEXT: s_endpgm 14186; 14187; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14188; GFX7: ; %bb.0: ; %entry 14189; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14190; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14191; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14192; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14193; GFX7-NEXT: s_mov_b64 s[10:11], 16 14194; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14195; GFX7-NEXT: s_mov_b32 s4, s8 14196; GFX7-NEXT: s_mov_b32 s5, s9 14197; GFX7-NEXT: s_mov_b32 s9, s10 14198; GFX7-NEXT: s_mov_b32 s8, s11 14199; GFX7-NEXT: s_add_u32 s4, s4, s9 14200; GFX7-NEXT: s_addc_u32 s8, s5, s8 14201; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14202; GFX7-NEXT: s_mov_b32 s5, s8 14203; GFX7-NEXT: v_mov_b32_e32 v2, s7 14204; GFX7-NEXT: v_mov_b32_e32 v0, s6 14205; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14206; GFX7-NEXT: v_mov_b32_e32 v3, v0 14207; GFX7-NEXT: v_mov_b32_e32 v0, s4 14208; GFX7-NEXT: v_mov_b32_e32 v1, s5 14209; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14210; GFX7-NEXT: s_endpgm 14211; 14212; GFX10-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14213; GFX10-WGP: ; %bb.0: ; %entry 14214; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14215; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14216; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14217; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14218; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14219; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14220; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14221; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14222; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14223; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14224; GFX10-WGP-NEXT: s_endpgm 14225; 14226; GFX10-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14227; GFX10-CU: ; %bb.0: ; %entry 14228; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14229; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14230; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14231; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14232; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14233; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14234; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14235; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14236; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14237; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14238; GFX10-CU-NEXT: s_endpgm 14239; 14240; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14241; SKIP-CACHE-INV: ; %bb.0: ; %entry 14242; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14243; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14244; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14245; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14246; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14247; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14248; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14249; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14250; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14251; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14252; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14253; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14254; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14255; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14256; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14257; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14258; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14259; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14260; SKIP-CACHE-INV-NEXT: s_endpgm 14261; 14262; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14263; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14264; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14265; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14266; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14267; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14268; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14269; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14270; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14271; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14272; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14273; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14274; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14275; 14276; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14277; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14278; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14279; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14280; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14281; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14282; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14283; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14284; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14285; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14286; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14287; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14288; GFX90A-TGSPLIT-NEXT: s_endpgm 14289; 14290; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14291; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14292; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14293; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14294; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14295; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14296; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14297; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14298; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14299; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14300; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14301; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14302; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14303; 14304; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14305; GFX940-TGSPLIT: ; %bb.0: ; %entry 14306; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14307; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14308; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14309; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14310; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14311; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14312; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14313; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14314; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14315; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14316; GFX940-TGSPLIT-NEXT: s_endpgm 14317; 14318; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14319; GFX11-WGP: ; %bb.0: ; %entry 14320; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14321; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14322; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14323; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14324; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14325; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14326; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14327; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14328; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14329; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14330; GFX11-WGP-NEXT: s_endpgm 14331; 14332; GFX11-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14333; GFX11-CU: ; %bb.0: ; %entry 14334; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14335; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14336; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14337; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14338; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14339; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14340; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14341; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14342; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14343; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14344; GFX11-CU-NEXT: s_endpgm 14345; 14346; GFX12-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14347; GFX12-WGP: ; %bb.0: ; %entry 14348; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14349; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14350; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14351; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14352; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14353; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14354; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14355; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14356; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14357; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 14358; GFX12-WGP-NEXT: s_endpgm 14359; 14360; GFX12-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 14361; GFX12-CU: ; %bb.0: ; %entry 14362; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14363; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14364; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14365; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14366; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14367; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14368; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14369; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14370; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14371; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 14372; GFX12-CU-NEXT: s_endpgm 14373 ptr addrspace(1) %out, i32 %in, i32 %old) { 14374entry: 14375 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14376 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic 14377 ret void 14378} 14379 14380define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( 14381; GFX6-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14382; GFX6: ; %bb.0: ; %entry 14383; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14384; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14385; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14386; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14387; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14388; GFX6-NEXT: s_mov_b32 s12, s5 14389; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14390; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14391; GFX6-NEXT: s_mov_b32 s11, -1 14392; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14393; GFX6-NEXT: s_mov_b32 s5, s12 14394; GFX6-NEXT: s_mov_b32 s6, s11 14395; GFX6-NEXT: s_mov_b32 s7, s10 14396; GFX6-NEXT: v_mov_b32_e32 v0, s9 14397; GFX6-NEXT: v_mov_b32_e32 v2, s8 14398; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14399; GFX6-NEXT: v_mov_b32_e32 v1, v2 14400; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14401; GFX6-NEXT: s_waitcnt vmcnt(0) 14402; GFX6-NEXT: buffer_wbinvl1 14403; GFX6-NEXT: s_endpgm 14404; 14405; GFX7-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14406; GFX7: ; %bb.0: ; %entry 14407; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14408; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14409; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14410; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14411; GFX7-NEXT: s_mov_b64 s[10:11], 16 14412; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14413; GFX7-NEXT: s_mov_b32 s4, s8 14414; GFX7-NEXT: s_mov_b32 s5, s9 14415; GFX7-NEXT: s_mov_b32 s9, s10 14416; GFX7-NEXT: s_mov_b32 s8, s11 14417; GFX7-NEXT: s_add_u32 s4, s4, s9 14418; GFX7-NEXT: s_addc_u32 s8, s5, s8 14419; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14420; GFX7-NEXT: s_mov_b32 s5, s8 14421; GFX7-NEXT: v_mov_b32_e32 v2, s7 14422; GFX7-NEXT: v_mov_b32_e32 v0, s6 14423; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14424; GFX7-NEXT: v_mov_b32_e32 v3, v0 14425; GFX7-NEXT: v_mov_b32_e32 v0, s4 14426; GFX7-NEXT: v_mov_b32_e32 v1, s5 14427; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14428; GFX7-NEXT: s_waitcnt vmcnt(0) 14429; GFX7-NEXT: buffer_wbinvl1_vol 14430; GFX7-NEXT: s_endpgm 14431; 14432; GFX10-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14433; GFX10-WGP: ; %bb.0: ; %entry 14434; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14435; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14436; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14437; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14438; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14439; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14440; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14441; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14442; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14443; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14444; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14445; GFX10-WGP-NEXT: buffer_gl1_inv 14446; GFX10-WGP-NEXT: buffer_gl0_inv 14447; GFX10-WGP-NEXT: s_endpgm 14448; 14449; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14450; GFX10-CU: ; %bb.0: ; %entry 14451; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14452; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14453; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14454; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14455; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14456; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14457; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14458; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14459; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14460; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14461; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14462; GFX10-CU-NEXT: buffer_gl1_inv 14463; GFX10-CU-NEXT: buffer_gl0_inv 14464; GFX10-CU-NEXT: s_endpgm 14465; 14466; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14467; SKIP-CACHE-INV: ; %bb.0: ; %entry 14468; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14469; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14470; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14471; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14472; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14473; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14474; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14475; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14476; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14477; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14478; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14479; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14480; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14481; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14482; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14483; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14484; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14485; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14486; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14487; SKIP-CACHE-INV-NEXT: s_endpgm 14488; 14489; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14490; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14491; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14492; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14493; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14494; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14495; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14496; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14497; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14498; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14499; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14500; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14501; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14502; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14503; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14504; 14505; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14506; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14507; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14508; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14509; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14510; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14511; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14512; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14513; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14514; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14515; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14516; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14517; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14518; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14519; GFX90A-TGSPLIT-NEXT: s_endpgm 14520; 14521; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14522; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14523; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14524; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14525; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14526; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14527; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14528; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14529; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14530; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14531; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14532; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14533; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14534; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 14535; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14536; 14537; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14538; GFX940-TGSPLIT: ; %bb.0: ; %entry 14539; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14540; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14541; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14542; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14543; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14544; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14545; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14546; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14547; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14548; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14549; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14550; GFX940-TGSPLIT-NEXT: buffer_inv sc1 14551; GFX940-TGSPLIT-NEXT: s_endpgm 14552; 14553; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14554; GFX11-WGP: ; %bb.0: ; %entry 14555; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14556; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14557; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14558; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14559; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14560; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14561; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14562; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14563; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14564; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14565; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14566; GFX11-WGP-NEXT: buffer_gl1_inv 14567; GFX11-WGP-NEXT: buffer_gl0_inv 14568; GFX11-WGP-NEXT: s_endpgm 14569; 14570; GFX11-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14571; GFX11-CU: ; %bb.0: ; %entry 14572; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14573; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14574; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14575; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14576; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14577; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14578; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14579; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14580; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14581; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14582; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14583; GFX11-CU-NEXT: buffer_gl1_inv 14584; GFX11-CU-NEXT: buffer_gl0_inv 14585; GFX11-CU-NEXT: s_endpgm 14586; 14587; GFX12-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14588; GFX12-WGP: ; %bb.0: ; %entry 14589; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14590; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14591; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14592; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14593; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14594; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14595; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14596; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14597; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14598; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 14599; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14600; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 14601; GFX12-WGP-NEXT: s_endpgm 14602; 14603; GFX12-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 14604; GFX12-CU: ; %bb.0: ; %entry 14605; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14606; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14607; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14608; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14609; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14610; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14611; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14612; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14613; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14614; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 14615; GFX12-CU-NEXT: s_wait_storecnt 0x0 14616; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 14617; GFX12-CU-NEXT: s_endpgm 14618 ptr addrspace(1) %out, i32 %in, i32 %old) { 14619entry: 14620 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14621 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic 14622 ret void 14623} 14624 14625define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( 14626; GFX6-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14627; GFX6: ; %bb.0: ; %entry 14628; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14629; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14630; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14631; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14632; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14633; GFX6-NEXT: s_mov_b32 s12, s5 14634; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14635; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14636; GFX6-NEXT: s_mov_b32 s11, -1 14637; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14638; GFX6-NEXT: s_mov_b32 s5, s12 14639; GFX6-NEXT: s_mov_b32 s6, s11 14640; GFX6-NEXT: s_mov_b32 s7, s10 14641; GFX6-NEXT: v_mov_b32_e32 v0, s9 14642; GFX6-NEXT: v_mov_b32_e32 v2, s8 14643; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14644; GFX6-NEXT: v_mov_b32_e32 v1, v2 14645; GFX6-NEXT: s_waitcnt vmcnt(0) 14646; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14647; GFX6-NEXT: s_endpgm 14648; 14649; GFX7-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14650; GFX7: ; %bb.0: ; %entry 14651; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14652; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14653; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14654; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14655; GFX7-NEXT: s_mov_b64 s[10:11], 16 14656; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14657; GFX7-NEXT: s_mov_b32 s4, s8 14658; GFX7-NEXT: s_mov_b32 s5, s9 14659; GFX7-NEXT: s_mov_b32 s9, s10 14660; GFX7-NEXT: s_mov_b32 s8, s11 14661; GFX7-NEXT: s_add_u32 s4, s4, s9 14662; GFX7-NEXT: s_addc_u32 s8, s5, s8 14663; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14664; GFX7-NEXT: s_mov_b32 s5, s8 14665; GFX7-NEXT: v_mov_b32_e32 v2, s7 14666; GFX7-NEXT: v_mov_b32_e32 v0, s6 14667; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14668; GFX7-NEXT: v_mov_b32_e32 v3, v0 14669; GFX7-NEXT: v_mov_b32_e32 v0, s4 14670; GFX7-NEXT: v_mov_b32_e32 v1, s5 14671; GFX7-NEXT: s_waitcnt vmcnt(0) 14672; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14673; GFX7-NEXT: s_endpgm 14674; 14675; GFX10-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14676; GFX10-WGP: ; %bb.0: ; %entry 14677; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14678; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14679; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14680; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14681; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14682; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14683; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14684; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14685; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14686; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14687; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14688; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14689; GFX10-WGP-NEXT: s_endpgm 14690; 14691; GFX10-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14692; GFX10-CU: ; %bb.0: ; %entry 14693; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14694; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14695; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14696; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14697; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14698; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14699; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14700; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14701; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14702; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14703; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14704; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14705; GFX10-CU-NEXT: s_endpgm 14706; 14707; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14708; SKIP-CACHE-INV: ; %bb.0: ; %entry 14709; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14710; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14711; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14712; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14713; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14714; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14715; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14716; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14717; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14718; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14719; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14720; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14721; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14722; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14723; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14724; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14725; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14726; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14727; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14728; SKIP-CACHE-INV-NEXT: s_endpgm 14729; 14730; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14731; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14732; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14733; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14734; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14735; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14736; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14737; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14738; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14739; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14740; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14741; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14742; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14743; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14744; 14745; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14746; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14747; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14748; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14749; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14750; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14751; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14752; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14753; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14754; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14755; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14756; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14757; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14758; GFX90A-TGSPLIT-NEXT: s_endpgm 14759; 14760; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14761; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14762; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14763; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14764; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14765; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14766; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14767; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14768; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14769; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14770; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14771; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 14772; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14773; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14774; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14775; 14776; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14777; GFX940-TGSPLIT: ; %bb.0: ; %entry 14778; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14779; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14780; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14781; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14782; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14783; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14784; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14785; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14786; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14787; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 14788; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14789; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14790; GFX940-TGSPLIT-NEXT: s_endpgm 14791; 14792; GFX11-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14793; GFX11-WGP: ; %bb.0: ; %entry 14794; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14795; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14796; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14797; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14798; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14799; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14800; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14801; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14802; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14803; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14804; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14805; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14806; GFX11-WGP-NEXT: s_endpgm 14807; 14808; GFX11-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14809; GFX11-CU: ; %bb.0: ; %entry 14810; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14811; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14812; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14813; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14814; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14815; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14816; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14817; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14818; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14819; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14820; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14821; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14822; GFX11-CU-NEXT: s_endpgm 14823; 14824; GFX12-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14825; GFX12-WGP: ; %bb.0: ; %entry 14826; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14827; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14828; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14829; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14830; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14831; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14832; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14833; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14834; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14835; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14836; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14837; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14838; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14839; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 14840; GFX12-WGP-NEXT: s_endpgm 14841; 14842; GFX12-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 14843; GFX12-CU: ; %bb.0: ; %entry 14844; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14845; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14846; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14847; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14848; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14849; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14850; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14851; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14852; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14853; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14854; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14855; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14856; GFX12-CU-NEXT: s_wait_storecnt 0x0 14857; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 14858; GFX12-CU-NEXT: s_endpgm 14859 ptr addrspace(1) %out, i32 %in, i32 %old) { 14860entry: 14861 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14862 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic 14863 ret void 14864} 14865 14866define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( 14867; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 14868; GFX6: ; %bb.0: ; %entry 14869; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14870; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14871; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14872; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14873; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14874; GFX6-NEXT: s_mov_b32 s12, s5 14875; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14876; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14877; GFX6-NEXT: s_mov_b32 s11, -1 14878; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14879; GFX6-NEXT: s_mov_b32 s5, s12 14880; GFX6-NEXT: s_mov_b32 s6, s11 14881; GFX6-NEXT: s_mov_b32 s7, s10 14882; GFX6-NEXT: v_mov_b32_e32 v0, s9 14883; GFX6-NEXT: v_mov_b32_e32 v2, s8 14884; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14885; GFX6-NEXT: v_mov_b32_e32 v1, v2 14886; GFX6-NEXT: s_waitcnt vmcnt(0) 14887; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14888; GFX6-NEXT: s_waitcnt vmcnt(0) 14889; GFX6-NEXT: buffer_wbinvl1 14890; GFX6-NEXT: s_endpgm 14891; 14892; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 14893; GFX7: ; %bb.0: ; %entry 14894; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14895; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14896; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14897; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14898; GFX7-NEXT: s_mov_b64 s[10:11], 16 14899; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14900; GFX7-NEXT: s_mov_b32 s4, s8 14901; GFX7-NEXT: s_mov_b32 s5, s9 14902; GFX7-NEXT: s_mov_b32 s9, s10 14903; GFX7-NEXT: s_mov_b32 s8, s11 14904; GFX7-NEXT: s_add_u32 s4, s4, s9 14905; GFX7-NEXT: s_addc_u32 s8, s5, s8 14906; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14907; GFX7-NEXT: s_mov_b32 s5, s8 14908; GFX7-NEXT: v_mov_b32_e32 v2, s7 14909; GFX7-NEXT: v_mov_b32_e32 v0, s6 14910; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14911; GFX7-NEXT: v_mov_b32_e32 v3, v0 14912; GFX7-NEXT: v_mov_b32_e32 v0, s4 14913; GFX7-NEXT: v_mov_b32_e32 v1, s5 14914; GFX7-NEXT: s_waitcnt vmcnt(0) 14915; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14916; GFX7-NEXT: s_waitcnt vmcnt(0) 14917; GFX7-NEXT: buffer_wbinvl1_vol 14918; GFX7-NEXT: s_endpgm 14919; 14920; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 14921; GFX10-WGP: ; %bb.0: ; %entry 14922; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14923; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14924; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14925; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14926; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14927; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14928; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14929; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14930; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14931; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14932; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14933; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14934; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14935; GFX10-WGP-NEXT: buffer_gl1_inv 14936; GFX10-WGP-NEXT: buffer_gl0_inv 14937; GFX10-WGP-NEXT: s_endpgm 14938; 14939; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 14940; GFX10-CU: ; %bb.0: ; %entry 14941; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14942; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14943; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14944; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14945; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14946; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14947; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14948; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14949; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14950; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14951; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14952; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14953; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14954; GFX10-CU-NEXT: buffer_gl1_inv 14955; GFX10-CU-NEXT: buffer_gl0_inv 14956; GFX10-CU-NEXT: s_endpgm 14957; 14958; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 14959; SKIP-CACHE-INV: ; %bb.0: ; %entry 14960; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14961; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14962; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14963; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14964; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14965; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14966; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14967; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14968; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14969; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14970; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14971; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14972; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14973; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14974; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14975; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14976; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14977; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14978; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14979; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14980; SKIP-CACHE-INV-NEXT: s_endpgm 14981; 14982; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 14983; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14984; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14985; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14986; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14987; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14988; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14989; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14990; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14991; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14992; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14993; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14994; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14995; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14996; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14997; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14998; 14999; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15000; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15001; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15002; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15003; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15004; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15005; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15006; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15007; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15008; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15009; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15010; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15011; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15012; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15013; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15014; GFX90A-TGSPLIT-NEXT: s_endpgm 15015; 15016; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15017; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15018; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15019; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15020; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15021; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15022; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15023; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15024; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15025; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15026; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15027; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 15028; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15029; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15030; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15031; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 15032; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15033; 15034; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15035; GFX940-TGSPLIT: ; %bb.0: ; %entry 15036; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15037; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15038; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15039; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15040; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15041; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15042; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15043; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15044; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15045; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 15046; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15047; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15048; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15049; GFX940-TGSPLIT-NEXT: buffer_inv sc1 15050; GFX940-TGSPLIT-NEXT: s_endpgm 15051; 15052; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15053; GFX11-WGP: ; %bb.0: ; %entry 15054; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15055; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15056; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15057; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15058; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15059; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15060; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15061; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15062; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15063; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15064; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15065; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15066; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15067; GFX11-WGP-NEXT: buffer_gl1_inv 15068; GFX11-WGP-NEXT: buffer_gl0_inv 15069; GFX11-WGP-NEXT: s_endpgm 15070; 15071; GFX11-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15072; GFX11-CU: ; %bb.0: ; %entry 15073; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15074; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15075; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15076; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15077; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15078; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15079; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15080; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15081; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15082; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15083; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15084; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15085; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15086; GFX11-CU-NEXT: buffer_gl1_inv 15087; GFX11-CU-NEXT: buffer_gl0_inv 15088; GFX11-CU-NEXT: s_endpgm 15089; 15090; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15091; GFX12-WGP: ; %bb.0: ; %entry 15092; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15093; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15094; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15095; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15096; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15097; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15098; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15099; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15100; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15101; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15102; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15103; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15104; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15105; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15106; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15107; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 15108; GFX12-WGP-NEXT: s_endpgm 15109; 15110; GFX12-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 15111; GFX12-CU: ; %bb.0: ; %entry 15112; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15113; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15114; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15115; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15116; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15117; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15118; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15119; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15120; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15121; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15122; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15123; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15124; GFX12-CU-NEXT: s_wait_storecnt 0x0 15125; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15126; GFX12-CU-NEXT: s_wait_storecnt 0x0 15127; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 15128; GFX12-CU-NEXT: s_endpgm 15129 ptr addrspace(1) %out, i32 %in, i32 %old) { 15130entry: 15131 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15132 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic 15133 ret void 15134} 15135 15136define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( 15137; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15138; GFX6: ; %bb.0: ; %entry 15139; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15140; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15141; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15142; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15143; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15144; GFX6-NEXT: s_mov_b32 s12, s5 15145; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15146; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15147; GFX6-NEXT: s_mov_b32 s11, -1 15148; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15149; GFX6-NEXT: s_mov_b32 s5, s12 15150; GFX6-NEXT: s_mov_b32 s6, s11 15151; GFX6-NEXT: s_mov_b32 s7, s10 15152; GFX6-NEXT: v_mov_b32_e32 v0, s9 15153; GFX6-NEXT: v_mov_b32_e32 v2, s8 15154; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15155; GFX6-NEXT: v_mov_b32_e32 v1, v2 15156; GFX6-NEXT: s_waitcnt vmcnt(0) 15157; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15158; GFX6-NEXT: s_waitcnt vmcnt(0) 15159; GFX6-NEXT: buffer_wbinvl1 15160; GFX6-NEXT: s_endpgm 15161; 15162; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15163; GFX7: ; %bb.0: ; %entry 15164; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15165; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15166; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15167; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15168; GFX7-NEXT: s_mov_b64 s[10:11], 16 15169; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15170; GFX7-NEXT: s_mov_b32 s4, s8 15171; GFX7-NEXT: s_mov_b32 s5, s9 15172; GFX7-NEXT: s_mov_b32 s9, s10 15173; GFX7-NEXT: s_mov_b32 s8, s11 15174; GFX7-NEXT: s_add_u32 s4, s4, s9 15175; GFX7-NEXT: s_addc_u32 s8, s5, s8 15176; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15177; GFX7-NEXT: s_mov_b32 s5, s8 15178; GFX7-NEXT: v_mov_b32_e32 v2, s7 15179; GFX7-NEXT: v_mov_b32_e32 v0, s6 15180; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15181; GFX7-NEXT: v_mov_b32_e32 v3, v0 15182; GFX7-NEXT: v_mov_b32_e32 v0, s4 15183; GFX7-NEXT: v_mov_b32_e32 v1, s5 15184; GFX7-NEXT: s_waitcnt vmcnt(0) 15185; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15186; GFX7-NEXT: s_waitcnt vmcnt(0) 15187; GFX7-NEXT: buffer_wbinvl1_vol 15188; GFX7-NEXT: s_endpgm 15189; 15190; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15191; GFX10-WGP: ; %bb.0: ; %entry 15192; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15193; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15194; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15195; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15196; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15197; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15198; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15199; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15200; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15201; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15202; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15203; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15204; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15205; GFX10-WGP-NEXT: buffer_gl1_inv 15206; GFX10-WGP-NEXT: buffer_gl0_inv 15207; GFX10-WGP-NEXT: s_endpgm 15208; 15209; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15210; GFX10-CU: ; %bb.0: ; %entry 15211; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15212; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15213; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15214; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15215; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15216; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15217; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15218; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15219; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15220; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15221; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15222; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15223; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15224; GFX10-CU-NEXT: buffer_gl1_inv 15225; GFX10-CU-NEXT: buffer_gl0_inv 15226; GFX10-CU-NEXT: s_endpgm 15227; 15228; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15229; SKIP-CACHE-INV: ; %bb.0: ; %entry 15230; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15231; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15232; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15233; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15234; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15235; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15236; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15237; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15238; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15239; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15240; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15241; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15242; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15243; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15244; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15245; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15246; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15247; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15248; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15249; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15250; SKIP-CACHE-INV-NEXT: s_endpgm 15251; 15252; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15253; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15254; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15255; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15256; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15257; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15258; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15259; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15260; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15261; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15262; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15263; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15264; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15265; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15266; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15267; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15268; 15269; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15270; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15271; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15272; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15273; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15274; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15275; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15276; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15277; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15278; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15279; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15280; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15281; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15282; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15283; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15284; GFX90A-TGSPLIT-NEXT: s_endpgm 15285; 15286; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15287; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15288; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15289; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15290; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15291; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15292; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15293; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15294; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15295; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15296; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15297; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 15298; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15299; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15300; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15301; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 15302; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15303; 15304; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15305; GFX940-TGSPLIT: ; %bb.0: ; %entry 15306; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15307; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15308; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15309; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15310; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15311; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15312; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15313; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15314; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15315; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 15316; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15317; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15318; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15319; GFX940-TGSPLIT-NEXT: buffer_inv sc1 15320; GFX940-TGSPLIT-NEXT: s_endpgm 15321; 15322; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15323; GFX11-WGP: ; %bb.0: ; %entry 15324; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15325; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15326; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15327; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15328; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15329; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15330; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15331; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15332; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15333; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15334; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15335; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15336; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15337; GFX11-WGP-NEXT: buffer_gl1_inv 15338; GFX11-WGP-NEXT: buffer_gl0_inv 15339; GFX11-WGP-NEXT: s_endpgm 15340; 15341; GFX11-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15342; GFX11-CU: ; %bb.0: ; %entry 15343; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15344; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15345; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15346; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15347; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15348; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15349; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15350; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15351; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15352; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15353; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15354; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15355; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15356; GFX11-CU-NEXT: buffer_gl1_inv 15357; GFX11-CU-NEXT: buffer_gl0_inv 15358; GFX11-CU-NEXT: s_endpgm 15359; 15360; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15361; GFX12-WGP: ; %bb.0: ; %entry 15362; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15363; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15364; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15365; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15366; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15367; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15368; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15369; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15370; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15371; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15372; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15373; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15374; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15375; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15376; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15377; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 15378; GFX12-WGP-NEXT: s_endpgm 15379; 15380; GFX12-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 15381; GFX12-CU: ; %bb.0: ; %entry 15382; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15383; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15384; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15385; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15386; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15387; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15388; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15389; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15390; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15391; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15392; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15393; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15394; GFX12-CU-NEXT: s_wait_storecnt 0x0 15395; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15396; GFX12-CU-NEXT: s_wait_storecnt 0x0 15397; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 15398; GFX12-CU-NEXT: s_endpgm 15399 ptr addrspace(1) %out, i32 %in, i32 %old) { 15400entry: 15401 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15402 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic 15403 ret void 15404} 15405 15406define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( 15407; GFX6-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15408; GFX6: ; %bb.0: ; %entry 15409; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15410; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15411; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15412; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15413; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15414; GFX6-NEXT: s_mov_b32 s12, s5 15415; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15416; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15417; GFX6-NEXT: s_mov_b32 s11, -1 15418; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15419; GFX6-NEXT: s_mov_b32 s5, s12 15420; GFX6-NEXT: s_mov_b32 s6, s11 15421; GFX6-NEXT: s_mov_b32 s7, s10 15422; GFX6-NEXT: v_mov_b32_e32 v0, s9 15423; GFX6-NEXT: v_mov_b32_e32 v2, s8 15424; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15425; GFX6-NEXT: v_mov_b32_e32 v1, v2 15426; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15427; GFX6-NEXT: s_waitcnt vmcnt(0) 15428; GFX6-NEXT: buffer_wbinvl1 15429; GFX6-NEXT: s_endpgm 15430; 15431; GFX7-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15432; GFX7: ; %bb.0: ; %entry 15433; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15434; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15435; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15436; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15437; GFX7-NEXT: s_mov_b64 s[10:11], 16 15438; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15439; GFX7-NEXT: s_mov_b32 s4, s8 15440; GFX7-NEXT: s_mov_b32 s5, s9 15441; GFX7-NEXT: s_mov_b32 s9, s10 15442; GFX7-NEXT: s_mov_b32 s8, s11 15443; GFX7-NEXT: s_add_u32 s4, s4, s9 15444; GFX7-NEXT: s_addc_u32 s8, s5, s8 15445; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15446; GFX7-NEXT: s_mov_b32 s5, s8 15447; GFX7-NEXT: v_mov_b32_e32 v2, s7 15448; GFX7-NEXT: v_mov_b32_e32 v0, s6 15449; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15450; GFX7-NEXT: v_mov_b32_e32 v3, v0 15451; GFX7-NEXT: v_mov_b32_e32 v0, s4 15452; GFX7-NEXT: v_mov_b32_e32 v1, s5 15453; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15454; GFX7-NEXT: s_waitcnt vmcnt(0) 15455; GFX7-NEXT: buffer_wbinvl1_vol 15456; GFX7-NEXT: s_endpgm 15457; 15458; GFX10-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15459; GFX10-WGP: ; %bb.0: ; %entry 15460; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15461; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15462; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15463; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15464; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15465; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15466; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15467; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15468; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15469; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15470; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15471; GFX10-WGP-NEXT: buffer_gl1_inv 15472; GFX10-WGP-NEXT: buffer_gl0_inv 15473; GFX10-WGP-NEXT: s_endpgm 15474; 15475; GFX10-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15476; GFX10-CU: ; %bb.0: ; %entry 15477; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15478; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15479; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15480; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15481; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15482; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15483; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15484; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15485; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15486; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15487; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15488; GFX10-CU-NEXT: buffer_gl1_inv 15489; GFX10-CU-NEXT: buffer_gl0_inv 15490; GFX10-CU-NEXT: s_endpgm 15491; 15492; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15493; SKIP-CACHE-INV: ; %bb.0: ; %entry 15494; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15495; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15496; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15497; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15498; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15499; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15500; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15501; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15502; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15503; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15504; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15505; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15506; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15507; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15508; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15509; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15510; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15511; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15512; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15513; SKIP-CACHE-INV-NEXT: s_endpgm 15514; 15515; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15516; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15517; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15518; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15519; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15520; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15521; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15522; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15523; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15524; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15525; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15526; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15527; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15528; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15529; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15530; 15531; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15532; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15533; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15534; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15535; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15536; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15537; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15538; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15539; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15540; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15541; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15542; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15543; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15544; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15545; GFX90A-TGSPLIT-NEXT: s_endpgm 15546; 15547; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15548; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15549; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15550; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15551; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15552; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15553; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15554; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15555; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15556; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15557; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15558; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15559; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15560; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 15561; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15562; 15563; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15564; GFX940-TGSPLIT: ; %bb.0: ; %entry 15565; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15566; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15567; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15568; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15569; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15570; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15571; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15572; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15573; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15574; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15575; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15576; GFX940-TGSPLIT-NEXT: buffer_inv sc1 15577; GFX940-TGSPLIT-NEXT: s_endpgm 15578; 15579; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15580; GFX11-WGP: ; %bb.0: ; %entry 15581; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15582; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15583; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15584; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15585; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15586; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15587; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15588; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15589; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15590; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15591; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15592; GFX11-WGP-NEXT: buffer_gl1_inv 15593; GFX11-WGP-NEXT: buffer_gl0_inv 15594; GFX11-WGP-NEXT: s_endpgm 15595; 15596; GFX11-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15597; GFX11-CU: ; %bb.0: ; %entry 15598; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15599; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15600; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15601; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15602; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15603; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15604; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15605; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15606; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15607; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15608; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15609; GFX11-CU-NEXT: buffer_gl1_inv 15610; GFX11-CU-NEXT: buffer_gl0_inv 15611; GFX11-CU-NEXT: s_endpgm 15612; 15613; GFX12-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15614; GFX12-WGP: ; %bb.0: ; %entry 15615; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15616; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15617; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15618; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15619; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15620; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15621; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15622; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15623; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15624; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15625; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15626; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 15627; GFX12-WGP-NEXT: s_endpgm 15628; 15629; GFX12-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: 15630; GFX12-CU: ; %bb.0: ; %entry 15631; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15632; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15633; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15634; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15635; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15636; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15637; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15638; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15639; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15640; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15641; GFX12-CU-NEXT: s_wait_storecnt 0x0 15642; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 15643; GFX12-CU-NEXT: s_endpgm 15644 ptr addrspace(1) %out, i32 %in, i32 %old) { 15645entry: 15646 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15647 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire 15648 ret void 15649} 15650 15651define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( 15652; GFX6-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15653; GFX6: ; %bb.0: ; %entry 15654; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15655; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15656; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15657; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15658; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15659; GFX6-NEXT: s_mov_b32 s12, s5 15660; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15661; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15662; GFX6-NEXT: s_mov_b32 s11, -1 15663; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15664; GFX6-NEXT: s_mov_b32 s5, s12 15665; GFX6-NEXT: s_mov_b32 s6, s11 15666; GFX6-NEXT: s_mov_b32 s7, s10 15667; GFX6-NEXT: v_mov_b32_e32 v0, s9 15668; GFX6-NEXT: v_mov_b32_e32 v2, s8 15669; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15670; GFX6-NEXT: v_mov_b32_e32 v1, v2 15671; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15672; GFX6-NEXT: s_waitcnt vmcnt(0) 15673; GFX6-NEXT: buffer_wbinvl1 15674; GFX6-NEXT: s_endpgm 15675; 15676; GFX7-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15677; GFX7: ; %bb.0: ; %entry 15678; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15679; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15680; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15681; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15682; GFX7-NEXT: s_mov_b64 s[10:11], 16 15683; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15684; GFX7-NEXT: s_mov_b32 s4, s8 15685; GFX7-NEXT: s_mov_b32 s5, s9 15686; GFX7-NEXT: s_mov_b32 s9, s10 15687; GFX7-NEXT: s_mov_b32 s8, s11 15688; GFX7-NEXT: s_add_u32 s4, s4, s9 15689; GFX7-NEXT: s_addc_u32 s8, s5, s8 15690; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15691; GFX7-NEXT: s_mov_b32 s5, s8 15692; GFX7-NEXT: v_mov_b32_e32 v2, s7 15693; GFX7-NEXT: v_mov_b32_e32 v0, s6 15694; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15695; GFX7-NEXT: v_mov_b32_e32 v3, v0 15696; GFX7-NEXT: v_mov_b32_e32 v0, s4 15697; GFX7-NEXT: v_mov_b32_e32 v1, s5 15698; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15699; GFX7-NEXT: s_waitcnt vmcnt(0) 15700; GFX7-NEXT: buffer_wbinvl1_vol 15701; GFX7-NEXT: s_endpgm 15702; 15703; GFX10-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15704; GFX10-WGP: ; %bb.0: ; %entry 15705; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15706; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15707; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15708; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15709; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15710; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15711; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15712; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15713; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15714; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15715; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15716; GFX10-WGP-NEXT: buffer_gl1_inv 15717; GFX10-WGP-NEXT: buffer_gl0_inv 15718; GFX10-WGP-NEXT: s_endpgm 15719; 15720; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15721; GFX10-CU: ; %bb.0: ; %entry 15722; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15723; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15724; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15725; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15726; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15727; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15728; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15729; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15730; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15731; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15732; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15733; GFX10-CU-NEXT: buffer_gl1_inv 15734; GFX10-CU-NEXT: buffer_gl0_inv 15735; GFX10-CU-NEXT: s_endpgm 15736; 15737; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15738; SKIP-CACHE-INV: ; %bb.0: ; %entry 15739; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15740; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15741; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15742; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15743; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15744; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15745; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15746; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15747; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15748; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15749; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15750; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15751; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15752; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15753; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15754; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15755; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15756; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15757; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15758; SKIP-CACHE-INV-NEXT: s_endpgm 15759; 15760; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15761; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15762; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15763; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15764; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15765; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15766; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15767; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15768; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15769; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15770; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15771; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15772; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15773; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15774; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15775; 15776; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15777; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15778; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15779; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15780; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15781; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15782; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15783; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15784; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15785; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15786; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15787; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15788; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15789; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15790; GFX90A-TGSPLIT-NEXT: s_endpgm 15791; 15792; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15793; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15794; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15795; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15796; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15797; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15798; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15799; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15800; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15801; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15802; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15803; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15804; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15805; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 15806; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15807; 15808; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15809; GFX940-TGSPLIT: ; %bb.0: ; %entry 15810; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15811; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15812; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15813; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15814; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15815; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15816; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15817; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15819; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15820; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15821; GFX940-TGSPLIT-NEXT: buffer_inv sc1 15822; GFX940-TGSPLIT-NEXT: s_endpgm 15823; 15824; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15825; GFX11-WGP: ; %bb.0: ; %entry 15826; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15827; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15828; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15829; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15830; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15831; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15832; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15833; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15834; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15835; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15836; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15837; GFX11-WGP-NEXT: buffer_gl1_inv 15838; GFX11-WGP-NEXT: buffer_gl0_inv 15839; GFX11-WGP-NEXT: s_endpgm 15840; 15841; GFX11-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15842; GFX11-CU: ; %bb.0: ; %entry 15843; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15844; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15845; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15846; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15847; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15848; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15849; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15850; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15851; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15852; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15853; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15854; GFX11-CU-NEXT: buffer_gl1_inv 15855; GFX11-CU-NEXT: buffer_gl0_inv 15856; GFX11-CU-NEXT: s_endpgm 15857; 15858; GFX12-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15859; GFX12-WGP: ; %bb.0: ; %entry 15860; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15861; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15862; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15863; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15864; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15865; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15866; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15867; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15868; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15869; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15870; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15871; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 15872; GFX12-WGP-NEXT: s_endpgm 15873; 15874; GFX12-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 15875; GFX12-CU: ; %bb.0: ; %entry 15876; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15877; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15878; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15879; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15880; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15881; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15882; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15883; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15884; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15885; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 15886; GFX12-CU-NEXT: s_wait_storecnt 0x0 15887; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 15888; GFX12-CU-NEXT: s_endpgm 15889 ptr addrspace(1) %out, i32 %in, i32 %old) { 15890entry: 15891 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15892 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire 15893 ret void 15894} 15895 15896define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( 15897; GFX6-LABEL: global_agent_one_as_release_acquire_cmpxchg: 15898; GFX6: ; %bb.0: ; %entry 15899; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15900; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15901; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15902; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15903; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15904; GFX6-NEXT: s_mov_b32 s12, s5 15905; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15906; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15907; GFX6-NEXT: s_mov_b32 s11, -1 15908; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15909; GFX6-NEXT: s_mov_b32 s5, s12 15910; GFX6-NEXT: s_mov_b32 s6, s11 15911; GFX6-NEXT: s_mov_b32 s7, s10 15912; GFX6-NEXT: v_mov_b32_e32 v0, s9 15913; GFX6-NEXT: v_mov_b32_e32 v2, s8 15914; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15915; GFX6-NEXT: v_mov_b32_e32 v1, v2 15916; GFX6-NEXT: s_waitcnt vmcnt(0) 15917; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15918; GFX6-NEXT: s_waitcnt vmcnt(0) 15919; GFX6-NEXT: buffer_wbinvl1 15920; GFX6-NEXT: s_endpgm 15921; 15922; GFX7-LABEL: global_agent_one_as_release_acquire_cmpxchg: 15923; GFX7: ; %bb.0: ; %entry 15924; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15925; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15926; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15927; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15928; GFX7-NEXT: s_mov_b64 s[10:11], 16 15929; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15930; GFX7-NEXT: s_mov_b32 s4, s8 15931; GFX7-NEXT: s_mov_b32 s5, s9 15932; GFX7-NEXT: s_mov_b32 s9, s10 15933; GFX7-NEXT: s_mov_b32 s8, s11 15934; GFX7-NEXT: s_add_u32 s4, s4, s9 15935; GFX7-NEXT: s_addc_u32 s8, s5, s8 15936; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15937; GFX7-NEXT: s_mov_b32 s5, s8 15938; GFX7-NEXT: v_mov_b32_e32 v2, s7 15939; GFX7-NEXT: v_mov_b32_e32 v0, s6 15940; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15941; GFX7-NEXT: v_mov_b32_e32 v3, v0 15942; GFX7-NEXT: v_mov_b32_e32 v0, s4 15943; GFX7-NEXT: v_mov_b32_e32 v1, s5 15944; GFX7-NEXT: s_waitcnt vmcnt(0) 15945; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15946; GFX7-NEXT: s_waitcnt vmcnt(0) 15947; GFX7-NEXT: buffer_wbinvl1_vol 15948; GFX7-NEXT: s_endpgm 15949; 15950; GFX10-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg: 15951; GFX10-WGP: ; %bb.0: ; %entry 15952; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15953; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15954; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15955; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15956; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15957; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15958; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15959; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15960; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15961; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15962; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15963; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15964; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15965; GFX10-WGP-NEXT: buffer_gl1_inv 15966; GFX10-WGP-NEXT: buffer_gl0_inv 15967; GFX10-WGP-NEXT: s_endpgm 15968; 15969; GFX10-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg: 15970; GFX10-CU: ; %bb.0: ; %entry 15971; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15972; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15973; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15974; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15975; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15976; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15977; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15978; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15979; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15980; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15981; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15982; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15983; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15984; GFX10-CU-NEXT: buffer_gl1_inv 15985; GFX10-CU-NEXT: buffer_gl0_inv 15986; GFX10-CU-NEXT: s_endpgm 15987; 15988; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg: 15989; SKIP-CACHE-INV: ; %bb.0: ; %entry 15990; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15991; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15992; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15993; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15994; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15995; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15996; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15997; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15998; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15999; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16000; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16001; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16002; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16003; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16004; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16005; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16006; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16007; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16008; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16009; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16010; SKIP-CACHE-INV-NEXT: s_endpgm 16011; 16012; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16013; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16014; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16015; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16016; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16017; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16018; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16019; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16020; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16021; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16022; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16023; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16024; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16025; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16026; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16027; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16028; 16029; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16030; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16031; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16032; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16033; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16034; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16035; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16036; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16037; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16038; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16039; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16040; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16041; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16042; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16043; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16044; GFX90A-TGSPLIT-NEXT: s_endpgm 16045; 16046; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16047; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16048; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16049; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16050; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16051; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16052; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16053; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16054; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16055; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16056; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16057; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 16058; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16059; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16060; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16061; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 16062; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16063; 16064; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16065; GFX940-TGSPLIT: ; %bb.0: ; %entry 16066; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16067; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16068; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16069; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16070; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16071; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16072; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16073; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16074; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16075; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 16076; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16077; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16078; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16079; GFX940-TGSPLIT-NEXT: buffer_inv sc1 16080; GFX940-TGSPLIT-NEXT: s_endpgm 16081; 16082; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16083; GFX11-WGP: ; %bb.0: ; %entry 16084; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16085; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16086; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16087; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16088; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16089; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16090; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16091; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16092; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16093; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16094; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16095; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16096; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16097; GFX11-WGP-NEXT: buffer_gl1_inv 16098; GFX11-WGP-NEXT: buffer_gl0_inv 16099; GFX11-WGP-NEXT: s_endpgm 16100; 16101; GFX11-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16102; GFX11-CU: ; %bb.0: ; %entry 16103; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16104; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16105; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16106; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16107; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16108; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16109; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16110; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16111; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16112; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16113; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16114; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16115; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16116; GFX11-CU-NEXT: buffer_gl1_inv 16117; GFX11-CU-NEXT: buffer_gl0_inv 16118; GFX11-CU-NEXT: s_endpgm 16119; 16120; GFX12-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16121; GFX12-WGP: ; %bb.0: ; %entry 16122; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16123; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16124; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16125; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16126; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16127; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16128; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16129; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16130; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16131; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16132; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16133; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16134; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16135; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16136; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16137; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 16138; GFX12-WGP-NEXT: s_endpgm 16139; 16140; GFX12-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg: 16141; GFX12-CU: ; %bb.0: ; %entry 16142; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16143; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16144; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16145; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16146; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16147; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16148; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16149; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16150; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16151; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16152; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16153; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16154; GFX12-CU-NEXT: s_wait_storecnt 0x0 16155; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16156; GFX12-CU-NEXT: s_wait_storecnt 0x0 16157; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 16158; GFX12-CU-NEXT: s_endpgm 16159 ptr addrspace(1) %out, i32 %in, i32 %old) { 16160entry: 16161 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16162 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire 16163 ret void 16164} 16165 16166define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( 16167; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16168; GFX6: ; %bb.0: ; %entry 16169; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16170; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16171; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16172; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16173; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16174; GFX6-NEXT: s_mov_b32 s12, s5 16175; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16176; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16177; GFX6-NEXT: s_mov_b32 s11, -1 16178; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16179; GFX6-NEXT: s_mov_b32 s5, s12 16180; GFX6-NEXT: s_mov_b32 s6, s11 16181; GFX6-NEXT: s_mov_b32 s7, s10 16182; GFX6-NEXT: v_mov_b32_e32 v0, s9 16183; GFX6-NEXT: v_mov_b32_e32 v2, s8 16184; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16185; GFX6-NEXT: v_mov_b32_e32 v1, v2 16186; GFX6-NEXT: s_waitcnt vmcnt(0) 16187; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16188; GFX6-NEXT: s_waitcnt vmcnt(0) 16189; GFX6-NEXT: buffer_wbinvl1 16190; GFX6-NEXT: s_endpgm 16191; 16192; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16193; GFX7: ; %bb.0: ; %entry 16194; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16195; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16196; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16197; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16198; GFX7-NEXT: s_mov_b64 s[10:11], 16 16199; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16200; GFX7-NEXT: s_mov_b32 s4, s8 16201; GFX7-NEXT: s_mov_b32 s5, s9 16202; GFX7-NEXT: s_mov_b32 s9, s10 16203; GFX7-NEXT: s_mov_b32 s8, s11 16204; GFX7-NEXT: s_add_u32 s4, s4, s9 16205; GFX7-NEXT: s_addc_u32 s8, s5, s8 16206; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16207; GFX7-NEXT: s_mov_b32 s5, s8 16208; GFX7-NEXT: v_mov_b32_e32 v2, s7 16209; GFX7-NEXT: v_mov_b32_e32 v0, s6 16210; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16211; GFX7-NEXT: v_mov_b32_e32 v3, v0 16212; GFX7-NEXT: v_mov_b32_e32 v0, s4 16213; GFX7-NEXT: v_mov_b32_e32 v1, s5 16214; GFX7-NEXT: s_waitcnt vmcnt(0) 16215; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16216; GFX7-NEXT: s_waitcnt vmcnt(0) 16217; GFX7-NEXT: buffer_wbinvl1_vol 16218; GFX7-NEXT: s_endpgm 16219; 16220; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16221; GFX10-WGP: ; %bb.0: ; %entry 16222; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16223; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16224; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16225; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16226; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16227; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16228; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16229; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16230; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16231; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16232; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16233; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16234; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16235; GFX10-WGP-NEXT: buffer_gl1_inv 16236; GFX10-WGP-NEXT: buffer_gl0_inv 16237; GFX10-WGP-NEXT: s_endpgm 16238; 16239; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16240; GFX10-CU: ; %bb.0: ; %entry 16241; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16242; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16243; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16244; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16245; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16246; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16247; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16248; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16249; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16250; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16251; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16252; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16253; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16254; GFX10-CU-NEXT: buffer_gl1_inv 16255; GFX10-CU-NEXT: buffer_gl0_inv 16256; GFX10-CU-NEXT: s_endpgm 16257; 16258; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16259; SKIP-CACHE-INV: ; %bb.0: ; %entry 16260; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16261; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16262; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16263; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16264; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16265; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16266; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16267; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16268; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16269; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16270; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16271; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16272; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16273; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16274; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16275; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16276; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16277; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16278; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16279; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16280; SKIP-CACHE-INV-NEXT: s_endpgm 16281; 16282; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16283; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16284; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16285; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16286; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16287; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16288; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16289; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16290; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16291; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16292; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16293; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16294; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16295; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16296; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16297; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16298; 16299; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16300; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16301; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16302; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16303; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16304; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16305; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16306; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16307; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16308; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16309; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16310; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16311; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16312; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16313; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16314; GFX90A-TGSPLIT-NEXT: s_endpgm 16315; 16316; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16317; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16318; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16319; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16320; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16321; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16322; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16323; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16324; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16325; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16326; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16327; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 16328; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16329; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16330; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16331; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 16332; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16333; 16334; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16335; GFX940-TGSPLIT: ; %bb.0: ; %entry 16336; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16337; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16338; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16339; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16340; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16341; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16342; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16343; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16344; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16345; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 16346; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16347; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16348; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16349; GFX940-TGSPLIT-NEXT: buffer_inv sc1 16350; GFX940-TGSPLIT-NEXT: s_endpgm 16351; 16352; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16353; GFX11-WGP: ; %bb.0: ; %entry 16354; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16355; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16356; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16357; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16358; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16359; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16360; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16361; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16362; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16363; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16364; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16365; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16366; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16367; GFX11-WGP-NEXT: buffer_gl1_inv 16368; GFX11-WGP-NEXT: buffer_gl0_inv 16369; GFX11-WGP-NEXT: s_endpgm 16370; 16371; GFX11-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16372; GFX11-CU: ; %bb.0: ; %entry 16373; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16374; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16375; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16376; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16377; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16378; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16379; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16380; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16381; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16382; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16383; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16384; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16385; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16386; GFX11-CU-NEXT: buffer_gl1_inv 16387; GFX11-CU-NEXT: buffer_gl0_inv 16388; GFX11-CU-NEXT: s_endpgm 16389; 16390; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16391; GFX12-WGP: ; %bb.0: ; %entry 16392; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16393; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16394; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16395; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16396; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16397; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16398; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16399; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16400; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16401; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16402; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16403; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16404; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16405; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16406; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16407; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 16408; GFX12-WGP-NEXT: s_endpgm 16409; 16410; GFX12-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 16411; GFX12-CU: ; %bb.0: ; %entry 16412; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16413; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16414; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16415; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16416; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16417; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16418; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16419; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16420; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16421; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16422; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16423; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16424; GFX12-CU-NEXT: s_wait_storecnt 0x0 16425; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16426; GFX12-CU-NEXT: s_wait_storecnt 0x0 16427; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 16428; GFX12-CU-NEXT: s_endpgm 16429 ptr addrspace(1) %out, i32 %in, i32 %old) { 16430entry: 16431 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16432 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire 16433 ret void 16434} 16435 16436define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( 16437; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16438; GFX6: ; %bb.0: ; %entry 16439; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16440; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16441; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16442; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16443; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16444; GFX6-NEXT: s_mov_b32 s12, s5 16445; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16446; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16447; GFX6-NEXT: s_mov_b32 s11, -1 16448; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16449; GFX6-NEXT: s_mov_b32 s5, s12 16450; GFX6-NEXT: s_mov_b32 s6, s11 16451; GFX6-NEXT: s_mov_b32 s7, s10 16452; GFX6-NEXT: v_mov_b32_e32 v0, s9 16453; GFX6-NEXT: v_mov_b32_e32 v2, s8 16454; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16455; GFX6-NEXT: v_mov_b32_e32 v1, v2 16456; GFX6-NEXT: s_waitcnt vmcnt(0) 16457; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16458; GFX6-NEXT: s_waitcnt vmcnt(0) 16459; GFX6-NEXT: buffer_wbinvl1 16460; GFX6-NEXT: s_endpgm 16461; 16462; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16463; GFX7: ; %bb.0: ; %entry 16464; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16465; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16466; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16467; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16468; GFX7-NEXT: s_mov_b64 s[10:11], 16 16469; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16470; GFX7-NEXT: s_mov_b32 s4, s8 16471; GFX7-NEXT: s_mov_b32 s5, s9 16472; GFX7-NEXT: s_mov_b32 s9, s10 16473; GFX7-NEXT: s_mov_b32 s8, s11 16474; GFX7-NEXT: s_add_u32 s4, s4, s9 16475; GFX7-NEXT: s_addc_u32 s8, s5, s8 16476; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16477; GFX7-NEXT: s_mov_b32 s5, s8 16478; GFX7-NEXT: v_mov_b32_e32 v2, s7 16479; GFX7-NEXT: v_mov_b32_e32 v0, s6 16480; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16481; GFX7-NEXT: v_mov_b32_e32 v3, v0 16482; GFX7-NEXT: v_mov_b32_e32 v0, s4 16483; GFX7-NEXT: v_mov_b32_e32 v1, s5 16484; GFX7-NEXT: s_waitcnt vmcnt(0) 16485; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16486; GFX7-NEXT: s_waitcnt vmcnt(0) 16487; GFX7-NEXT: buffer_wbinvl1_vol 16488; GFX7-NEXT: s_endpgm 16489; 16490; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16491; GFX10-WGP: ; %bb.0: ; %entry 16492; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16493; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16494; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16495; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16496; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16497; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16498; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16499; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16500; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16501; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16502; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16503; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16504; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16505; GFX10-WGP-NEXT: buffer_gl1_inv 16506; GFX10-WGP-NEXT: buffer_gl0_inv 16507; GFX10-WGP-NEXT: s_endpgm 16508; 16509; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16510; GFX10-CU: ; %bb.0: ; %entry 16511; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16512; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16513; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16514; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16515; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16516; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16517; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16518; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16519; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16520; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16521; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16522; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16523; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16524; GFX10-CU-NEXT: buffer_gl1_inv 16525; GFX10-CU-NEXT: buffer_gl0_inv 16526; GFX10-CU-NEXT: s_endpgm 16527; 16528; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16529; SKIP-CACHE-INV: ; %bb.0: ; %entry 16530; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16531; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16532; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16533; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16534; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16535; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16536; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16537; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16538; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16539; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16540; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16541; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16542; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16543; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16544; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16545; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16546; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16547; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16548; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16549; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16550; SKIP-CACHE-INV-NEXT: s_endpgm 16551; 16552; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16553; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16554; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16555; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16556; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16557; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16558; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16559; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16560; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16561; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16562; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16563; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16564; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16565; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16566; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16567; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16568; 16569; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16570; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16571; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16572; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16573; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16574; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16575; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16576; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16577; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16578; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16579; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16580; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16581; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16582; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16583; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16584; GFX90A-TGSPLIT-NEXT: s_endpgm 16585; 16586; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16587; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16588; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16589; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16590; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16591; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16592; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16593; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16594; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16595; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16596; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16597; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 16598; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16599; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16600; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16601; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 16602; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16603; 16604; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16605; GFX940-TGSPLIT: ; %bb.0: ; %entry 16606; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16607; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16608; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16609; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16610; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16611; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16612; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16613; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16614; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16615; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 16616; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16617; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16618; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16619; GFX940-TGSPLIT-NEXT: buffer_inv sc1 16620; GFX940-TGSPLIT-NEXT: s_endpgm 16621; 16622; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16623; GFX11-WGP: ; %bb.0: ; %entry 16624; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16625; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16626; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16627; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16628; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16629; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16630; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16631; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16632; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16633; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16634; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16635; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16636; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16637; GFX11-WGP-NEXT: buffer_gl1_inv 16638; GFX11-WGP-NEXT: buffer_gl0_inv 16639; GFX11-WGP-NEXT: s_endpgm 16640; 16641; GFX11-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16642; GFX11-CU: ; %bb.0: ; %entry 16643; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16644; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16645; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16646; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16647; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16648; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16649; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16650; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16651; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16652; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16653; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16654; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16655; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16656; GFX11-CU-NEXT: buffer_gl1_inv 16657; GFX11-CU-NEXT: buffer_gl0_inv 16658; GFX11-CU-NEXT: s_endpgm 16659; 16660; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16661; GFX12-WGP: ; %bb.0: ; %entry 16662; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16663; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16664; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16665; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16666; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16667; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16668; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16669; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16670; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16671; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16672; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16673; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16674; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16675; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16676; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16677; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 16678; GFX12-WGP-NEXT: s_endpgm 16679; 16680; GFX12-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 16681; GFX12-CU: ; %bb.0: ; %entry 16682; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16683; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16684; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16685; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16686; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16687; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16688; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16689; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16690; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16691; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16692; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16693; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16694; GFX12-CU-NEXT: s_wait_storecnt 0x0 16695; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16696; GFX12-CU-NEXT: s_wait_storecnt 0x0 16697; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 16698; GFX12-CU-NEXT: s_endpgm 16699 ptr addrspace(1) %out, i32 %in, i32 %old) { 16700entry: 16701 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16702 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire 16703 ret void 16704} 16705 16706define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( 16707; GFX6-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16708; GFX6: ; %bb.0: ; %entry 16709; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16710; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16711; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16712; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16713; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16714; GFX6-NEXT: s_mov_b32 s12, s5 16715; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16716; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16717; GFX6-NEXT: s_mov_b32 s11, -1 16718; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16719; GFX6-NEXT: s_mov_b32 s5, s12 16720; GFX6-NEXT: s_mov_b32 s6, s11 16721; GFX6-NEXT: s_mov_b32 s7, s10 16722; GFX6-NEXT: v_mov_b32_e32 v0, s9 16723; GFX6-NEXT: v_mov_b32_e32 v2, s8 16724; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16725; GFX6-NEXT: v_mov_b32_e32 v1, v2 16726; GFX6-NEXT: s_waitcnt vmcnt(0) 16727; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16728; GFX6-NEXT: s_waitcnt vmcnt(0) 16729; GFX6-NEXT: buffer_wbinvl1 16730; GFX6-NEXT: s_endpgm 16731; 16732; GFX7-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16733; GFX7: ; %bb.0: ; %entry 16734; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16735; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16736; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16737; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16738; GFX7-NEXT: s_mov_b64 s[10:11], 16 16739; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16740; GFX7-NEXT: s_mov_b32 s4, s8 16741; GFX7-NEXT: s_mov_b32 s5, s9 16742; GFX7-NEXT: s_mov_b32 s9, s10 16743; GFX7-NEXT: s_mov_b32 s8, s11 16744; GFX7-NEXT: s_add_u32 s4, s4, s9 16745; GFX7-NEXT: s_addc_u32 s8, s5, s8 16746; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16747; GFX7-NEXT: s_mov_b32 s5, s8 16748; GFX7-NEXT: v_mov_b32_e32 v2, s7 16749; GFX7-NEXT: v_mov_b32_e32 v0, s6 16750; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16751; GFX7-NEXT: v_mov_b32_e32 v3, v0 16752; GFX7-NEXT: v_mov_b32_e32 v0, s4 16753; GFX7-NEXT: v_mov_b32_e32 v1, s5 16754; GFX7-NEXT: s_waitcnt vmcnt(0) 16755; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16756; GFX7-NEXT: s_waitcnt vmcnt(0) 16757; GFX7-NEXT: buffer_wbinvl1_vol 16758; GFX7-NEXT: s_endpgm 16759; 16760; GFX10-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16761; GFX10-WGP: ; %bb.0: ; %entry 16762; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16763; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16764; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16765; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16766; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16767; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16768; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16769; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16770; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16771; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16772; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16773; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16774; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16775; GFX10-WGP-NEXT: buffer_gl1_inv 16776; GFX10-WGP-NEXT: buffer_gl0_inv 16777; GFX10-WGP-NEXT: s_endpgm 16778; 16779; GFX10-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16780; GFX10-CU: ; %bb.0: ; %entry 16781; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16782; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16783; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16784; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16785; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16786; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16787; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16788; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16789; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16790; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16791; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16792; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16793; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16794; GFX10-CU-NEXT: buffer_gl1_inv 16795; GFX10-CU-NEXT: buffer_gl0_inv 16796; GFX10-CU-NEXT: s_endpgm 16797; 16798; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16799; SKIP-CACHE-INV: ; %bb.0: ; %entry 16800; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16801; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16802; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16803; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16804; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16805; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16806; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16807; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16808; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16809; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16810; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16811; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16812; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16814; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16815; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16816; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16817; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16818; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16819; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16820; SKIP-CACHE-INV-NEXT: s_endpgm 16821; 16822; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16823; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16824; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16825; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16826; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16827; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16828; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16829; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16830; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16831; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16832; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16833; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16834; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16835; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16836; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16837; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16838; 16839; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16840; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16841; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16842; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16843; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16844; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16845; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16846; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16847; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16848; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16849; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16850; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16851; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16852; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16853; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16854; GFX90A-TGSPLIT-NEXT: s_endpgm 16855; 16856; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16857; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16858; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16859; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16860; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16861; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16862; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16863; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16864; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16865; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16866; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16867; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 16868; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16869; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16870; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16871; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 16872; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16873; 16874; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16875; GFX940-TGSPLIT: ; %bb.0: ; %entry 16876; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16877; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16878; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16879; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16880; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16881; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16882; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16883; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16884; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16885; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 16886; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16887; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 16888; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16889; GFX940-TGSPLIT-NEXT: buffer_inv sc1 16890; GFX940-TGSPLIT-NEXT: s_endpgm 16891; 16892; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16893; GFX11-WGP: ; %bb.0: ; %entry 16894; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16895; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16896; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16897; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16898; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16899; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16900; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16901; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16902; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16903; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16904; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16905; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16906; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16907; GFX11-WGP-NEXT: buffer_gl1_inv 16908; GFX11-WGP-NEXT: buffer_gl0_inv 16909; GFX11-WGP-NEXT: s_endpgm 16910; 16911; GFX11-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16912; GFX11-CU: ; %bb.0: ; %entry 16913; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16914; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16915; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16916; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16917; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16918; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16919; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16920; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16921; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16922; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16923; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16924; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16925; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16926; GFX11-CU-NEXT: buffer_gl1_inv 16927; GFX11-CU-NEXT: buffer_gl0_inv 16928; GFX11-CU-NEXT: s_endpgm 16929; 16930; GFX12-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16931; GFX12-WGP: ; %bb.0: ; %entry 16932; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16933; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16934; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16935; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16936; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16937; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16938; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16939; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16940; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16941; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16942; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16943; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16944; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16945; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16946; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16947; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 16948; GFX12-WGP-NEXT: s_endpgm 16949; 16950; GFX12-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: 16951; GFX12-CU: ; %bb.0: ; %entry 16952; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16953; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16954; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16955; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16956; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16957; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16958; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16959; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16960; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16961; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16962; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16963; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16964; GFX12-CU-NEXT: s_wait_storecnt 0x0 16965; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 16966; GFX12-CU-NEXT: s_wait_storecnt 0x0 16967; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 16968; GFX12-CU-NEXT: s_endpgm 16969 ptr addrspace(1) %out, i32 %in, i32 %old) { 16970entry: 16971 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16972 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst 16973 ret void 16974} 16975 16976define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( 16977; GFX6-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 16978; GFX6: ; %bb.0: ; %entry 16979; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16980; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16981; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16982; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16983; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16984; GFX6-NEXT: s_mov_b32 s12, s5 16985; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16986; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16987; GFX6-NEXT: s_mov_b32 s11, -1 16988; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16989; GFX6-NEXT: s_mov_b32 s5, s12 16990; GFX6-NEXT: s_mov_b32 s6, s11 16991; GFX6-NEXT: s_mov_b32 s7, s10 16992; GFX6-NEXT: v_mov_b32_e32 v0, s9 16993; GFX6-NEXT: v_mov_b32_e32 v2, s8 16994; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16995; GFX6-NEXT: v_mov_b32_e32 v1, v2 16996; GFX6-NEXT: s_waitcnt vmcnt(0) 16997; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16998; GFX6-NEXT: s_waitcnt vmcnt(0) 16999; GFX6-NEXT: buffer_wbinvl1 17000; GFX6-NEXT: s_endpgm 17001; 17002; GFX7-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17003; GFX7: ; %bb.0: ; %entry 17004; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17005; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17006; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17007; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17008; GFX7-NEXT: s_mov_b64 s[10:11], 16 17009; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17010; GFX7-NEXT: s_mov_b32 s4, s8 17011; GFX7-NEXT: s_mov_b32 s5, s9 17012; GFX7-NEXT: s_mov_b32 s9, s10 17013; GFX7-NEXT: s_mov_b32 s8, s11 17014; GFX7-NEXT: s_add_u32 s4, s4, s9 17015; GFX7-NEXT: s_addc_u32 s8, s5, s8 17016; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17017; GFX7-NEXT: s_mov_b32 s5, s8 17018; GFX7-NEXT: v_mov_b32_e32 v2, s7 17019; GFX7-NEXT: v_mov_b32_e32 v0, s6 17020; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17021; GFX7-NEXT: v_mov_b32_e32 v3, v0 17022; GFX7-NEXT: v_mov_b32_e32 v0, s4 17023; GFX7-NEXT: v_mov_b32_e32 v1, s5 17024; GFX7-NEXT: s_waitcnt vmcnt(0) 17025; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17026; GFX7-NEXT: s_waitcnt vmcnt(0) 17027; GFX7-NEXT: buffer_wbinvl1_vol 17028; GFX7-NEXT: s_endpgm 17029; 17030; GFX10-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17031; GFX10-WGP: ; %bb.0: ; %entry 17032; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17033; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17034; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17035; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17036; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17037; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17038; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17039; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17040; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17041; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17042; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17043; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17044; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17045; GFX10-WGP-NEXT: buffer_gl1_inv 17046; GFX10-WGP-NEXT: buffer_gl0_inv 17047; GFX10-WGP-NEXT: s_endpgm 17048; 17049; GFX10-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17050; GFX10-CU: ; %bb.0: ; %entry 17051; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17052; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17053; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17054; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17055; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17056; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17057; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17058; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17059; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17060; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17061; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17062; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17063; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17064; GFX10-CU-NEXT: buffer_gl1_inv 17065; GFX10-CU-NEXT: buffer_gl0_inv 17066; GFX10-CU-NEXT: s_endpgm 17067; 17068; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17069; SKIP-CACHE-INV: ; %bb.0: ; %entry 17070; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17071; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17072; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17073; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17074; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17075; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17076; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17077; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17078; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17079; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17080; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17081; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17082; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17083; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17084; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17085; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17086; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17087; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17088; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 17089; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17090; SKIP-CACHE-INV-NEXT: s_endpgm 17091; 17092; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17093; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17094; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17095; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17096; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17097; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17098; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17099; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17100; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17101; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17102; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17103; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17104; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17105; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17106; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17107; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17108; 17109; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17110; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17111; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17112; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17113; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17114; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17115; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17116; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17117; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17118; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17119; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17120; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17121; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17122; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17123; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17124; GFX90A-TGSPLIT-NEXT: s_endpgm 17125; 17126; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17127; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17128; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17129; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17130; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17131; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17132; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17133; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17134; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17135; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17136; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17137; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 17138; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17139; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17140; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17141; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 17142; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17143; 17144; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17145; GFX940-TGSPLIT: ; %bb.0: ; %entry 17146; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17147; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17148; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17149; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17150; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17151; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17152; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17153; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17154; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17155; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 17156; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17157; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17158; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17159; GFX940-TGSPLIT-NEXT: buffer_inv sc1 17160; GFX940-TGSPLIT-NEXT: s_endpgm 17161; 17162; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17163; GFX11-WGP: ; %bb.0: ; %entry 17164; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17165; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17166; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17167; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17168; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17169; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17170; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17171; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17172; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17173; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17174; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17175; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17176; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17177; GFX11-WGP-NEXT: buffer_gl1_inv 17178; GFX11-WGP-NEXT: buffer_gl0_inv 17179; GFX11-WGP-NEXT: s_endpgm 17180; 17181; GFX11-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17182; GFX11-CU: ; %bb.0: ; %entry 17183; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17184; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17185; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17186; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17187; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17188; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17189; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17190; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17191; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17192; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17193; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17194; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17195; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17196; GFX11-CU-NEXT: buffer_gl1_inv 17197; GFX11-CU-NEXT: buffer_gl0_inv 17198; GFX11-CU-NEXT: s_endpgm 17199; 17200; GFX12-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17201; GFX12-WGP: ; %bb.0: ; %entry 17202; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17203; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17204; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17205; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17206; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17207; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17208; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17209; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17210; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17211; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17212; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17213; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17214; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17215; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 17216; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17217; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 17218; GFX12-WGP-NEXT: s_endpgm 17219; 17220; GFX12-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: 17221; GFX12-CU: ; %bb.0: ; %entry 17222; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17223; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17224; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17225; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17226; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17227; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17228; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17229; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17230; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17231; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17232; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17233; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17234; GFX12-CU-NEXT: s_wait_storecnt 0x0 17235; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 17236; GFX12-CU-NEXT: s_wait_storecnt 0x0 17237; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 17238; GFX12-CU-NEXT: s_endpgm 17239 ptr addrspace(1) %out, i32 %in, i32 %old) { 17240entry: 17241 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17242 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst 17243 ret void 17244} 17245 17246define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( 17247; GFX6-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17248; GFX6: ; %bb.0: ; %entry 17249; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17250; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17251; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17252; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17253; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17254; GFX6-NEXT: s_mov_b32 s12, s5 17255; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17256; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17257; GFX6-NEXT: s_mov_b32 s11, -1 17258; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17259; GFX6-NEXT: s_mov_b32 s5, s12 17260; GFX6-NEXT: s_mov_b32 s6, s11 17261; GFX6-NEXT: s_mov_b32 s7, s10 17262; GFX6-NEXT: v_mov_b32_e32 v0, s9 17263; GFX6-NEXT: v_mov_b32_e32 v2, s8 17264; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17265; GFX6-NEXT: v_mov_b32_e32 v1, v2 17266; GFX6-NEXT: s_waitcnt vmcnt(0) 17267; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 17268; GFX6-NEXT: s_waitcnt vmcnt(0) 17269; GFX6-NEXT: buffer_wbinvl1 17270; GFX6-NEXT: s_endpgm 17271; 17272; GFX7-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17273; GFX7: ; %bb.0: ; %entry 17274; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17275; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17276; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17277; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17278; GFX7-NEXT: s_mov_b64 s[10:11], 16 17279; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17280; GFX7-NEXT: s_mov_b32 s4, s8 17281; GFX7-NEXT: s_mov_b32 s5, s9 17282; GFX7-NEXT: s_mov_b32 s9, s10 17283; GFX7-NEXT: s_mov_b32 s8, s11 17284; GFX7-NEXT: s_add_u32 s4, s4, s9 17285; GFX7-NEXT: s_addc_u32 s8, s5, s8 17286; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17287; GFX7-NEXT: s_mov_b32 s5, s8 17288; GFX7-NEXT: v_mov_b32_e32 v2, s7 17289; GFX7-NEXT: v_mov_b32_e32 v0, s6 17290; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17291; GFX7-NEXT: v_mov_b32_e32 v3, v0 17292; GFX7-NEXT: v_mov_b32_e32 v0, s4 17293; GFX7-NEXT: v_mov_b32_e32 v1, s5 17294; GFX7-NEXT: s_waitcnt vmcnt(0) 17295; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17296; GFX7-NEXT: s_waitcnt vmcnt(0) 17297; GFX7-NEXT: buffer_wbinvl1_vol 17298; GFX7-NEXT: s_endpgm 17299; 17300; GFX10-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17301; GFX10-WGP: ; %bb.0: ; %entry 17302; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17303; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17304; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17305; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17306; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17307; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17308; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17309; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17310; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17311; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17312; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17313; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17314; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17315; GFX10-WGP-NEXT: buffer_gl1_inv 17316; GFX10-WGP-NEXT: buffer_gl0_inv 17317; GFX10-WGP-NEXT: s_endpgm 17318; 17319; GFX10-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17320; GFX10-CU: ; %bb.0: ; %entry 17321; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17322; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17323; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17324; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17325; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17326; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17327; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17328; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17329; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17330; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17331; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17332; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17333; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17334; GFX10-CU-NEXT: buffer_gl1_inv 17335; GFX10-CU-NEXT: buffer_gl0_inv 17336; GFX10-CU-NEXT: s_endpgm 17337; 17338; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17339; SKIP-CACHE-INV: ; %bb.0: ; %entry 17340; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17341; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17342; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17343; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17344; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17345; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17346; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17347; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17348; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17349; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17350; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17351; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17352; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17353; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17354; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17355; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17356; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17357; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17358; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 17359; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17360; SKIP-CACHE-INV-NEXT: s_endpgm 17361; 17362; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17363; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17364; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17365; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17366; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17367; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17368; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17369; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17370; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17371; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17372; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17373; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17374; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17375; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17376; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17377; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17378; 17379; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17380; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17381; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17382; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17383; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17384; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17385; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17386; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17387; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17388; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17389; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17390; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17391; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17392; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17393; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17394; GFX90A-TGSPLIT-NEXT: s_endpgm 17395; 17396; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17397; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17398; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17399; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17400; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17401; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17402; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17403; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17404; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17405; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17406; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17407; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 17408; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17409; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17410; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17411; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 17412; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17413; 17414; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17415; GFX940-TGSPLIT: ; %bb.0: ; %entry 17416; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17417; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17418; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17419; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17420; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17421; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17422; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17423; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17424; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17425; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 17426; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17427; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17428; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17429; GFX940-TGSPLIT-NEXT: buffer_inv sc1 17430; GFX940-TGSPLIT-NEXT: s_endpgm 17431; 17432; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17433; GFX11-WGP: ; %bb.0: ; %entry 17434; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17435; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17436; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17437; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17438; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17439; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17440; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17441; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17442; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17443; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17444; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17445; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17446; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17447; GFX11-WGP-NEXT: buffer_gl1_inv 17448; GFX11-WGP-NEXT: buffer_gl0_inv 17449; GFX11-WGP-NEXT: s_endpgm 17450; 17451; GFX11-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17452; GFX11-CU: ; %bb.0: ; %entry 17453; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17454; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17455; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17456; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17457; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17458; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17459; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17460; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17461; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17462; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17463; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17464; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17465; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17466; GFX11-CU-NEXT: buffer_gl1_inv 17467; GFX11-CU-NEXT: buffer_gl0_inv 17468; GFX11-CU-NEXT: s_endpgm 17469; 17470; GFX12-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17471; GFX12-WGP: ; %bb.0: ; %entry 17472; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17473; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17474; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17475; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17476; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17477; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17478; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17479; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17480; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17481; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17482; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17483; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17484; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17485; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 17486; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17487; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 17488; GFX12-WGP-NEXT: s_endpgm 17489; 17490; GFX12-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: 17491; GFX12-CU: ; %bb.0: ; %entry 17492; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17493; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17494; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17495; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17496; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17497; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17498; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17499; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17500; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17501; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17502; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17503; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17504; GFX12-CU-NEXT: s_wait_storecnt 0x0 17505; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 17506; GFX12-CU-NEXT: s_wait_storecnt 0x0 17507; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 17508; GFX12-CU-NEXT: s_endpgm 17509 ptr addrspace(1) %out, i32 %in, i32 %old) { 17510entry: 17511 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17512 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst 17513 ret void 17514} 17515 17516define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( 17517; GFX6-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17518; GFX6: ; %bb.0: ; %entry 17519; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17520; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17521; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17522; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17523; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17524; GFX6-NEXT: s_mov_b32 s12, s5 17525; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17526; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17527; GFX6-NEXT: s_mov_b32 s11, -1 17528; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17529; GFX6-NEXT: s_mov_b32 s5, s12 17530; GFX6-NEXT: s_mov_b32 s6, s11 17531; GFX6-NEXT: s_mov_b32 s7, s10 17532; GFX6-NEXT: v_mov_b32_e32 v0, s9 17533; GFX6-NEXT: v_mov_b32_e32 v2, s8 17534; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17535; GFX6-NEXT: v_mov_b32_e32 v1, v2 17536; GFX6-NEXT: s_waitcnt vmcnt(0) 17537; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 17538; GFX6-NEXT: s_waitcnt vmcnt(0) 17539; GFX6-NEXT: buffer_wbinvl1 17540; GFX6-NEXT: s_endpgm 17541; 17542; GFX7-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17543; GFX7: ; %bb.0: ; %entry 17544; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17545; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17546; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17547; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17548; GFX7-NEXT: s_mov_b64 s[10:11], 16 17549; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17550; GFX7-NEXT: s_mov_b32 s4, s8 17551; GFX7-NEXT: s_mov_b32 s5, s9 17552; GFX7-NEXT: s_mov_b32 s9, s10 17553; GFX7-NEXT: s_mov_b32 s8, s11 17554; GFX7-NEXT: s_add_u32 s4, s4, s9 17555; GFX7-NEXT: s_addc_u32 s8, s5, s8 17556; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17557; GFX7-NEXT: s_mov_b32 s5, s8 17558; GFX7-NEXT: v_mov_b32_e32 v2, s7 17559; GFX7-NEXT: v_mov_b32_e32 v0, s6 17560; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17561; GFX7-NEXT: v_mov_b32_e32 v3, v0 17562; GFX7-NEXT: v_mov_b32_e32 v0, s4 17563; GFX7-NEXT: v_mov_b32_e32 v1, s5 17564; GFX7-NEXT: s_waitcnt vmcnt(0) 17565; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17566; GFX7-NEXT: s_waitcnt vmcnt(0) 17567; GFX7-NEXT: buffer_wbinvl1_vol 17568; GFX7-NEXT: s_endpgm 17569; 17570; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17571; GFX10-WGP: ; %bb.0: ; %entry 17572; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17573; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17574; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17575; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17576; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17577; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17578; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17579; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17580; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17581; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17582; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17583; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17584; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17585; GFX10-WGP-NEXT: buffer_gl1_inv 17586; GFX10-WGP-NEXT: buffer_gl0_inv 17587; GFX10-WGP-NEXT: s_endpgm 17588; 17589; GFX10-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17590; GFX10-CU: ; %bb.0: ; %entry 17591; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17592; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17593; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17594; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17595; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17596; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17597; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17598; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17599; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17600; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17601; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17602; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17603; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17604; GFX10-CU-NEXT: buffer_gl1_inv 17605; GFX10-CU-NEXT: buffer_gl0_inv 17606; GFX10-CU-NEXT: s_endpgm 17607; 17608; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17609; SKIP-CACHE-INV: ; %bb.0: ; %entry 17610; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17611; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17612; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17613; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17614; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17615; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17616; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17617; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17618; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17619; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17620; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17621; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17622; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17623; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17624; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17625; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17626; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17627; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17628; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 17629; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17630; SKIP-CACHE-INV-NEXT: s_endpgm 17631; 17632; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17633; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17634; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17635; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17636; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17637; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17638; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17639; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17640; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17641; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17642; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17643; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17644; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17645; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17646; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17647; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17648; 17649; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17650; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17651; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17652; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17653; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17654; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17655; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17656; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17657; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17658; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17659; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17660; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17661; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17662; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17663; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17664; GFX90A-TGSPLIT-NEXT: s_endpgm 17665; 17666; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17667; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17668; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17669; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17670; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17671; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17672; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17673; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17674; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17675; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17676; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17677; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 17678; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17679; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17680; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17681; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 17682; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17683; 17684; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17685; GFX940-TGSPLIT: ; %bb.0: ; %entry 17686; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17687; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17688; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17689; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17690; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17691; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17692; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17693; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17694; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17695; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 17696; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17697; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17698; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17699; GFX940-TGSPLIT-NEXT: buffer_inv sc1 17700; GFX940-TGSPLIT-NEXT: s_endpgm 17701; 17702; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17703; GFX11-WGP: ; %bb.0: ; %entry 17704; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17705; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17706; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17707; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17708; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17709; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17710; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17711; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17712; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17713; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17714; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17715; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17716; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17717; GFX11-WGP-NEXT: buffer_gl1_inv 17718; GFX11-WGP-NEXT: buffer_gl0_inv 17719; GFX11-WGP-NEXT: s_endpgm 17720; 17721; GFX11-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17722; GFX11-CU: ; %bb.0: ; %entry 17723; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17724; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17725; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17726; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17727; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17728; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17729; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17730; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17731; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17732; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17733; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17734; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17735; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17736; GFX11-CU-NEXT: buffer_gl1_inv 17737; GFX11-CU-NEXT: buffer_gl0_inv 17738; GFX11-CU-NEXT: s_endpgm 17739; 17740; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17741; GFX12-WGP: ; %bb.0: ; %entry 17742; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17743; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17744; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17745; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17746; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17747; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17748; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17749; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17750; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17751; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17752; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17753; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17754; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17755; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 17756; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17757; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 17758; GFX12-WGP-NEXT: s_endpgm 17759; 17760; GFX12-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: 17761; GFX12-CU: ; %bb.0: ; %entry 17762; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17763; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17764; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17765; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17766; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17767; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17768; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17769; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17770; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17771; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17772; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17773; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17774; GFX12-CU-NEXT: s_wait_storecnt 0x0 17775; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 17776; GFX12-CU-NEXT: s_wait_storecnt 0x0 17777; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 17778; GFX12-CU-NEXT: s_endpgm 17779 ptr addrspace(1) %out, i32 %in, i32 %old) { 17780entry: 17781 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17782 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst 17783 ret void 17784} 17785 17786define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( 17787; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17788; GFX6: ; %bb.0: ; %entry 17789; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17790; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17791; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17792; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17793; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17794; GFX6-NEXT: s_mov_b32 s12, s5 17795; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17796; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17797; GFX6-NEXT: s_mov_b32 s11, -1 17798; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17799; GFX6-NEXT: s_mov_b32 s5, s12 17800; GFX6-NEXT: s_mov_b32 s6, s11 17801; GFX6-NEXT: s_mov_b32 s7, s10 17802; GFX6-NEXT: v_mov_b32_e32 v0, s9 17803; GFX6-NEXT: v_mov_b32_e32 v2, s8 17804; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17805; GFX6-NEXT: v_mov_b32_e32 v1, v2 17806; GFX6-NEXT: s_waitcnt vmcnt(0) 17807; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 17808; GFX6-NEXT: s_waitcnt vmcnt(0) 17809; GFX6-NEXT: buffer_wbinvl1 17810; GFX6-NEXT: s_endpgm 17811; 17812; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17813; GFX7: ; %bb.0: ; %entry 17814; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17815; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17816; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17817; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17818; GFX7-NEXT: s_mov_b64 s[10:11], 16 17819; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17820; GFX7-NEXT: s_mov_b32 s4, s8 17821; GFX7-NEXT: s_mov_b32 s5, s9 17822; GFX7-NEXT: s_mov_b32 s9, s10 17823; GFX7-NEXT: s_mov_b32 s8, s11 17824; GFX7-NEXT: s_add_u32 s4, s4, s9 17825; GFX7-NEXT: s_addc_u32 s8, s5, s8 17826; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17827; GFX7-NEXT: s_mov_b32 s5, s8 17828; GFX7-NEXT: v_mov_b32_e32 v2, s7 17829; GFX7-NEXT: v_mov_b32_e32 v0, s6 17830; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17831; GFX7-NEXT: v_mov_b32_e32 v3, v0 17832; GFX7-NEXT: v_mov_b32_e32 v0, s4 17833; GFX7-NEXT: v_mov_b32_e32 v1, s5 17834; GFX7-NEXT: s_waitcnt vmcnt(0) 17835; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17836; GFX7-NEXT: s_waitcnt vmcnt(0) 17837; GFX7-NEXT: buffer_wbinvl1_vol 17838; GFX7-NEXT: s_endpgm 17839; 17840; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17841; GFX10-WGP: ; %bb.0: ; %entry 17842; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17843; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17844; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17845; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17846; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17847; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17848; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17849; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17850; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17851; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17852; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17853; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17854; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17855; GFX10-WGP-NEXT: buffer_gl1_inv 17856; GFX10-WGP-NEXT: buffer_gl0_inv 17857; GFX10-WGP-NEXT: s_endpgm 17858; 17859; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17860; GFX10-CU: ; %bb.0: ; %entry 17861; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17862; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17863; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17864; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17865; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17866; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17867; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17868; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17869; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17870; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17871; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17872; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 17873; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17874; GFX10-CU-NEXT: buffer_gl1_inv 17875; GFX10-CU-NEXT: buffer_gl0_inv 17876; GFX10-CU-NEXT: s_endpgm 17877; 17878; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17879; SKIP-CACHE-INV: ; %bb.0: ; %entry 17880; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17881; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17882; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17883; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17884; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17885; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17886; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17887; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17888; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17889; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17890; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17891; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17892; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17893; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17894; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17895; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17896; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17897; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17898; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 17899; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17900; SKIP-CACHE-INV-NEXT: s_endpgm 17901; 17902; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17903; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17904; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17905; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17906; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17907; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17908; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17909; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17910; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17911; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17912; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17913; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17914; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17915; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17916; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17917; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17918; 17919; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17920; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17921; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17922; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17923; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17924; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17925; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17926; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17927; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17928; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17929; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17930; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17931; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 17932; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17933; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17934; GFX90A-TGSPLIT-NEXT: s_endpgm 17935; 17936; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17937; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17938; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17939; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17940; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17941; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17942; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17943; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17944; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17945; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17946; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17947; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 17948; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17949; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17950; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17951; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 17952; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17953; 17954; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17955; GFX940-TGSPLIT: ; %bb.0: ; %entry 17956; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17957; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17958; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17959; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17960; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17961; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17962; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17963; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17964; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17965; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 17966; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17967; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 17968; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17969; GFX940-TGSPLIT-NEXT: buffer_inv sc1 17970; GFX940-TGSPLIT-NEXT: s_endpgm 17971; 17972; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17973; GFX11-WGP: ; %bb.0: ; %entry 17974; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17975; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17976; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17977; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17978; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17979; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17980; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17981; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17982; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17983; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17984; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17985; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 17986; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17987; GFX11-WGP-NEXT: buffer_gl1_inv 17988; GFX11-WGP-NEXT: buffer_gl0_inv 17989; GFX11-WGP-NEXT: s_endpgm 17990; 17991; GFX11-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 17992; GFX11-CU: ; %bb.0: ; %entry 17993; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17994; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17995; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17996; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17997; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17998; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17999; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18000; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18001; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18002; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18003; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18004; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 18005; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18006; GFX11-CU-NEXT: buffer_gl1_inv 18007; GFX11-CU-NEXT: buffer_gl0_inv 18008; GFX11-CU-NEXT: s_endpgm 18009; 18010; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 18011; GFX12-WGP: ; %bb.0: ; %entry 18012; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18013; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18014; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18015; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18016; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18017; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18018; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18019; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18020; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18021; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18022; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18023; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18024; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18025; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 18026; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18027; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 18028; GFX12-WGP-NEXT: s_endpgm 18029; 18030; GFX12-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 18031; GFX12-CU: ; %bb.0: ; %entry 18032; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18033; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18034; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18035; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18036; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18037; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18038; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18039; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18040; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18041; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18042; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18043; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18044; GFX12-CU-NEXT: s_wait_storecnt 0x0 18045; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV 18046; GFX12-CU-NEXT: s_wait_storecnt 0x0 18047; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 18048; GFX12-CU-NEXT: s_endpgm 18049 ptr addrspace(1) %out, i32 %in, i32 %old) { 18050entry: 18051 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18052 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst 18053 ret void 18054} 18055 18056define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg( 18057; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18058; GFX6: ; %bb.0: ; %entry 18059; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18060; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18061; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18062; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18063; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18064; GFX6-NEXT: s_mov_b32 s12, s5 18065; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18066; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18067; GFX6-NEXT: s_mov_b32 s11, -1 18068; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18069; GFX6-NEXT: s_mov_b32 s5, s12 18070; GFX6-NEXT: s_mov_b32 s6, s11 18071; GFX6-NEXT: s_mov_b32 s7, s10 18072; GFX6-NEXT: v_mov_b32_e32 v0, s9 18073; GFX6-NEXT: v_mov_b32_e32 v2, s8 18074; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18075; GFX6-NEXT: v_mov_b32_e32 v1, v2 18076; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18077; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18078; GFX6-NEXT: s_waitcnt vmcnt(0) 18079; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18080; GFX6-NEXT: s_endpgm 18081; 18082; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18083; GFX7: ; %bb.0: ; %entry 18084; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18085; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18086; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18087; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18088; GFX7-NEXT: s_mov_b64 s[12:13], 16 18089; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18090; GFX7-NEXT: s_mov_b32 s6, s4 18091; GFX7-NEXT: s_mov_b32 s7, s5 18092; GFX7-NEXT: s_mov_b32 s11, s12 18093; GFX7-NEXT: s_mov_b32 s10, s13 18094; GFX7-NEXT: s_add_u32 s6, s6, s11 18095; GFX7-NEXT: s_addc_u32 s10, s7, s10 18096; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18097; GFX7-NEXT: s_mov_b32 s7, s10 18098; GFX7-NEXT: v_mov_b32_e32 v2, s9 18099; GFX7-NEXT: v_mov_b32_e32 v0, s8 18100; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18101; GFX7-NEXT: v_mov_b32_e32 v3, v0 18102; GFX7-NEXT: v_mov_b32_e32 v0, s6 18103; GFX7-NEXT: v_mov_b32_e32 v1, s7 18104; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18105; GFX7-NEXT: v_mov_b32_e32 v0, s4 18106; GFX7-NEXT: v_mov_b32_e32 v1, s5 18107; GFX7-NEXT: s_waitcnt vmcnt(0) 18108; GFX7-NEXT: flat_store_dword v[0:1], v2 18109; GFX7-NEXT: s_endpgm 18110; 18111; GFX10-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18112; GFX10-WGP: ; %bb.0: ; %entry 18113; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18114; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18115; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18116; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18117; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18118; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18119; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18120; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18121; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18122; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18123; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18124; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18125; GFX10-WGP-NEXT: s_endpgm 18126; 18127; GFX10-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18128; GFX10-CU: ; %bb.0: ; %entry 18129; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18130; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18131; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18132; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18133; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18134; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18135; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18136; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18137; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18138; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18139; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18140; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18141; GFX10-CU-NEXT: s_endpgm 18142; 18143; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18144; SKIP-CACHE-INV: ; %bb.0: ; %entry 18145; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18146; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18147; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18148; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18149; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18150; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18151; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18152; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18153; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18154; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18155; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18156; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18157; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18158; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18159; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18160; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18161; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18162; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18163; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18164; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18165; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18166; SKIP-CACHE-INV-NEXT: s_endpgm 18167; 18168; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18169; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18170; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18171; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18172; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18173; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18174; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18175; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18176; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18177; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18178; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18179; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18180; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18181; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18182; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18183; 18184; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18185; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18186; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18187; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18188; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18189; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18190; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18191; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18192; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18193; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18194; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18195; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18196; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18197; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18198; GFX90A-TGSPLIT-NEXT: s_endpgm 18199; 18200; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18201; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18202; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18203; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18204; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18205; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18206; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18207; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18208; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18209; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18210; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18211; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18212; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18213; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18214; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18215; 18216; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18217; GFX940-TGSPLIT: ; %bb.0: ; %entry 18218; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18219; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18220; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18221; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18222; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18223; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18224; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18225; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18226; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18227; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18228; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18229; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18230; GFX940-TGSPLIT-NEXT: s_endpgm 18231; 18232; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18233; GFX11-WGP: ; %bb.0: ; %entry 18234; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18235; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18236; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18237; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18238; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18239; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18240; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18241; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18242; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18243; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18244; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18245; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18246; GFX11-WGP-NEXT: s_endpgm 18247; 18248; GFX11-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18249; GFX11-CU: ; %bb.0: ; %entry 18250; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18251; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18252; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18253; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18254; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18255; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18256; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18257; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18258; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18259; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18260; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18261; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18262; GFX11-CU-NEXT: s_endpgm 18263; 18264; GFX12-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18265; GFX12-WGP: ; %bb.0: ; %entry 18266; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18267; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18268; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18269; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18270; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18271; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18272; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18273; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18274; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18275; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 18276; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18277; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18278; GFX12-WGP-NEXT: s_endpgm 18279; 18280; GFX12-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: 18281; GFX12-CU: ; %bb.0: ; %entry 18282; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18283; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18284; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18285; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18286; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18287; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18288; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18289; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18290; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18291; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 18292; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18293; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18294; GFX12-CU-NEXT: s_endpgm 18295 ptr addrspace(1) %out, i32 %in, i32 %old) { 18296entry: 18297 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18298 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic 18299 %val0 = extractvalue { i32, i1 } %val, 0 18300 store i32 %val0, ptr addrspace(1) %out, align 4 18301 ret void 18302} 18303 18304define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( 18305; GFX6-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18306; GFX6: ; %bb.0: ; %entry 18307; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18308; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18309; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18310; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18311; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18312; GFX6-NEXT: s_mov_b32 s12, s5 18313; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18314; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18315; GFX6-NEXT: s_mov_b32 s11, -1 18316; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18317; GFX6-NEXT: s_mov_b32 s5, s12 18318; GFX6-NEXT: s_mov_b32 s6, s11 18319; GFX6-NEXT: s_mov_b32 s7, s10 18320; GFX6-NEXT: v_mov_b32_e32 v0, s9 18321; GFX6-NEXT: v_mov_b32_e32 v2, s8 18322; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18323; GFX6-NEXT: v_mov_b32_e32 v1, v2 18324; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18325; GFX6-NEXT: s_waitcnt vmcnt(0) 18326; GFX6-NEXT: buffer_wbinvl1 18327; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18328; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18329; GFX6-NEXT: s_endpgm 18330; 18331; GFX7-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18332; GFX7: ; %bb.0: ; %entry 18333; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18334; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18335; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18336; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18337; GFX7-NEXT: s_mov_b64 s[12:13], 16 18338; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18339; GFX7-NEXT: s_mov_b32 s6, s4 18340; GFX7-NEXT: s_mov_b32 s7, s5 18341; GFX7-NEXT: s_mov_b32 s11, s12 18342; GFX7-NEXT: s_mov_b32 s10, s13 18343; GFX7-NEXT: s_add_u32 s6, s6, s11 18344; GFX7-NEXT: s_addc_u32 s10, s7, s10 18345; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18346; GFX7-NEXT: s_mov_b32 s7, s10 18347; GFX7-NEXT: v_mov_b32_e32 v2, s9 18348; GFX7-NEXT: v_mov_b32_e32 v0, s8 18349; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18350; GFX7-NEXT: v_mov_b32_e32 v3, v0 18351; GFX7-NEXT: v_mov_b32_e32 v0, s6 18352; GFX7-NEXT: v_mov_b32_e32 v1, s7 18353; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18354; GFX7-NEXT: s_waitcnt vmcnt(0) 18355; GFX7-NEXT: buffer_wbinvl1_vol 18356; GFX7-NEXT: v_mov_b32_e32 v0, s4 18357; GFX7-NEXT: v_mov_b32_e32 v1, s5 18358; GFX7-NEXT: flat_store_dword v[0:1], v2 18359; GFX7-NEXT: s_endpgm 18360; 18361; GFX10-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18362; GFX10-WGP: ; %bb.0: ; %entry 18363; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18364; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18365; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18366; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18367; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18368; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18369; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18370; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18371; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18372; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18373; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18374; GFX10-WGP-NEXT: buffer_gl1_inv 18375; GFX10-WGP-NEXT: buffer_gl0_inv 18376; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18377; GFX10-WGP-NEXT: s_endpgm 18378; 18379; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18380; GFX10-CU: ; %bb.0: ; %entry 18381; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18382; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18383; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18384; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18385; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18386; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18387; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18388; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18389; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18390; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18391; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18392; GFX10-CU-NEXT: buffer_gl1_inv 18393; GFX10-CU-NEXT: buffer_gl0_inv 18394; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18395; GFX10-CU-NEXT: s_endpgm 18396; 18397; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18398; SKIP-CACHE-INV: ; %bb.0: ; %entry 18399; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18400; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18401; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18402; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18403; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18404; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18405; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18406; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18407; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18408; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18409; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18410; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18411; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18412; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18413; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18414; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18415; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18416; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18417; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18418; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18419; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18420; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18421; SKIP-CACHE-INV-NEXT: s_endpgm 18422; 18423; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18424; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18425; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18426; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18427; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18428; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18429; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18430; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18431; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18432; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18433; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18434; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18435; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18436; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18437; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18438; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18439; 18440; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18441; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18442; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18443; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18444; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18445; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18446; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18447; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18448; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18449; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18450; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18451; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18452; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18453; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18454; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18455; GFX90A-TGSPLIT-NEXT: s_endpgm 18456; 18457; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18458; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18459; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18460; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18461; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18462; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18463; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18464; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18465; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18466; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18467; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18468; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18469; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18470; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 18471; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18472; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18473; 18474; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18475; GFX940-TGSPLIT: ; %bb.0: ; %entry 18476; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18477; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18478; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18479; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18480; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18481; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18482; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18483; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18484; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18485; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18486; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18487; GFX940-TGSPLIT-NEXT: buffer_inv sc1 18488; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18489; GFX940-TGSPLIT-NEXT: s_endpgm 18490; 18491; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18492; GFX11-WGP: ; %bb.0: ; %entry 18493; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18494; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18495; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18496; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18497; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18498; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18499; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18500; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18501; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18502; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18503; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18504; GFX11-WGP-NEXT: buffer_gl1_inv 18505; GFX11-WGP-NEXT: buffer_gl0_inv 18506; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18507; GFX11-WGP-NEXT: s_endpgm 18508; 18509; GFX11-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18510; GFX11-CU: ; %bb.0: ; %entry 18511; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18512; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18513; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18514; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18515; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18516; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18517; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18518; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18519; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18520; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18521; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18522; GFX11-CU-NEXT: buffer_gl1_inv 18523; GFX11-CU-NEXT: buffer_gl0_inv 18524; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18525; GFX11-CU-NEXT: s_endpgm 18526; 18527; GFX12-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18528; GFX12-WGP: ; %bb.0: ; %entry 18529; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18530; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18531; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18532; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18533; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18534; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18535; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18536; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18537; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18538; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 18539; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18540; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 18541; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18542; GFX12-WGP-NEXT: s_endpgm 18543; 18544; GFX12-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 18545; GFX12-CU: ; %bb.0: ; %entry 18546; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18547; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18548; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18549; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18550; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18551; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18552; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18553; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18554; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18555; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 18556; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18557; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 18558; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18559; GFX12-CU-NEXT: s_endpgm 18560 ptr addrspace(1) %out, i32 %in, i32 %old) { 18561entry: 18562 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18563 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic 18564 %val0 = extractvalue { i32, i1 } %val, 0 18565 store i32 %val0, ptr addrspace(1) %out, align 4 18566 ret void 18567} 18568 18569define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( 18570; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18571; GFX6: ; %bb.0: ; %entry 18572; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18573; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18574; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18575; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18576; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18577; GFX6-NEXT: s_mov_b32 s12, s5 18578; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18579; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18580; GFX6-NEXT: s_mov_b32 s11, -1 18581; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18582; GFX6-NEXT: s_mov_b32 s5, s12 18583; GFX6-NEXT: s_mov_b32 s6, s11 18584; GFX6-NEXT: s_mov_b32 s7, s10 18585; GFX6-NEXT: v_mov_b32_e32 v0, s9 18586; GFX6-NEXT: v_mov_b32_e32 v2, s8 18587; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18588; GFX6-NEXT: v_mov_b32_e32 v1, v2 18589; GFX6-NEXT: s_waitcnt vmcnt(0) 18590; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18591; GFX6-NEXT: s_waitcnt vmcnt(0) 18592; GFX6-NEXT: buffer_wbinvl1 18593; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18594; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18595; GFX6-NEXT: s_endpgm 18596; 18597; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18598; GFX7: ; %bb.0: ; %entry 18599; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18600; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18601; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18602; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18603; GFX7-NEXT: s_mov_b64 s[12:13], 16 18604; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18605; GFX7-NEXT: s_mov_b32 s6, s4 18606; GFX7-NEXT: s_mov_b32 s7, s5 18607; GFX7-NEXT: s_mov_b32 s11, s12 18608; GFX7-NEXT: s_mov_b32 s10, s13 18609; GFX7-NEXT: s_add_u32 s6, s6, s11 18610; GFX7-NEXT: s_addc_u32 s10, s7, s10 18611; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18612; GFX7-NEXT: s_mov_b32 s7, s10 18613; GFX7-NEXT: v_mov_b32_e32 v2, s9 18614; GFX7-NEXT: v_mov_b32_e32 v0, s8 18615; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18616; GFX7-NEXT: v_mov_b32_e32 v3, v0 18617; GFX7-NEXT: v_mov_b32_e32 v0, s6 18618; GFX7-NEXT: v_mov_b32_e32 v1, s7 18619; GFX7-NEXT: s_waitcnt vmcnt(0) 18620; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18621; GFX7-NEXT: s_waitcnt vmcnt(0) 18622; GFX7-NEXT: buffer_wbinvl1_vol 18623; GFX7-NEXT: v_mov_b32_e32 v0, s4 18624; GFX7-NEXT: v_mov_b32_e32 v1, s5 18625; GFX7-NEXT: flat_store_dword v[0:1], v2 18626; GFX7-NEXT: s_endpgm 18627; 18628; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18629; GFX10-WGP: ; %bb.0: ; %entry 18630; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18631; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18632; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18633; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18634; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18635; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18636; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18637; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18638; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18639; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18640; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18641; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18642; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18643; GFX10-WGP-NEXT: buffer_gl1_inv 18644; GFX10-WGP-NEXT: buffer_gl0_inv 18645; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18646; GFX10-WGP-NEXT: s_endpgm 18647; 18648; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18649; GFX10-CU: ; %bb.0: ; %entry 18650; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18651; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18652; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18653; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18654; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18655; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18656; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18657; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18658; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18659; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18660; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 18661; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18662; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18663; GFX10-CU-NEXT: buffer_gl1_inv 18664; GFX10-CU-NEXT: buffer_gl0_inv 18665; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18666; GFX10-CU-NEXT: s_endpgm 18667; 18668; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18669; SKIP-CACHE-INV: ; %bb.0: ; %entry 18670; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18671; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18672; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18673; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18674; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18675; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18676; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18677; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18678; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18679; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18680; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18681; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18682; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18683; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18684; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18685; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18686; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18687; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18688; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18689; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18690; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18691; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18692; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18693; SKIP-CACHE-INV-NEXT: s_endpgm 18694; 18695; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18696; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18697; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18698; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18699; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18700; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18701; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18702; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18703; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18704; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18705; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18706; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18707; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18708; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18709; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18710; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18711; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18712; 18713; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18714; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18715; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18716; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18717; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18718; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18719; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18720; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18721; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18722; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18723; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18724; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18725; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18726; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18727; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18728; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18729; GFX90A-TGSPLIT-NEXT: s_endpgm 18730; 18731; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18732; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18733; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18734; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18735; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18736; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18737; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18738; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18739; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18740; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18741; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18742; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 18743; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18744; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18745; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18746; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 18747; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18748; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18749; 18750; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18751; GFX940-TGSPLIT: ; %bb.0: ; %entry 18752; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18753; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18754; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18755; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18756; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18757; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18758; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18759; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18760; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18761; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 18762; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18763; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18764; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18765; GFX940-TGSPLIT-NEXT: buffer_inv sc1 18766; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18767; GFX940-TGSPLIT-NEXT: s_endpgm 18768; 18769; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18770; GFX11-WGP: ; %bb.0: ; %entry 18771; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18772; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18773; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18774; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18775; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18776; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18777; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18778; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18779; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18780; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18781; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18782; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18783; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18784; GFX11-WGP-NEXT: buffer_gl1_inv 18785; GFX11-WGP-NEXT: buffer_gl0_inv 18786; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18787; GFX11-WGP-NEXT: s_endpgm 18788; 18789; GFX11-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18790; GFX11-CU: ; %bb.0: ; %entry 18791; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18792; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18793; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18794; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18795; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18796; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18797; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18798; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18799; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18800; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18801; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18802; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18803; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18804; GFX11-CU-NEXT: buffer_gl1_inv 18805; GFX11-CU-NEXT: buffer_gl0_inv 18806; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18807; GFX11-CU-NEXT: s_endpgm 18808; 18809; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18810; GFX12-WGP: ; %bb.0: ; %entry 18811; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18812; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18813; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18814; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18815; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18816; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18817; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18818; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18819; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18820; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18821; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18822; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18823; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18824; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 18825; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18826; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18827; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18828; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 18829; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18830; GFX12-WGP-NEXT: s_endpgm 18831; 18832; GFX12-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 18833; GFX12-CU: ; %bb.0: ; %entry 18834; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18835; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18836; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18837; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18838; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18839; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18840; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18841; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18842; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18843; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18844; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18845; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18846; GFX12-CU-NEXT: s_wait_storecnt 0x0 18847; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 18848; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18849; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18850; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18851; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 18852; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18853; GFX12-CU-NEXT: s_endpgm 18854 ptr addrspace(1) %out, i32 %in, i32 %old) { 18855entry: 18856 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18857 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic 18858 %val0 = extractvalue { i32, i1 } %val, 0 18859 store i32 %val0, ptr addrspace(1) %out, align 4 18860 ret void 18861} 18862 18863define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( 18864; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 18865; GFX6: ; %bb.0: ; %entry 18866; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18867; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18868; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18869; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18870; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18871; GFX6-NEXT: s_mov_b32 s12, s5 18872; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18873; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18874; GFX6-NEXT: s_mov_b32 s11, -1 18875; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18876; GFX6-NEXT: s_mov_b32 s5, s12 18877; GFX6-NEXT: s_mov_b32 s6, s11 18878; GFX6-NEXT: s_mov_b32 s7, s10 18879; GFX6-NEXT: v_mov_b32_e32 v0, s9 18880; GFX6-NEXT: v_mov_b32_e32 v2, s8 18881; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18882; GFX6-NEXT: v_mov_b32_e32 v1, v2 18883; GFX6-NEXT: s_waitcnt vmcnt(0) 18884; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18885; GFX6-NEXT: s_waitcnt vmcnt(0) 18886; GFX6-NEXT: buffer_wbinvl1 18887; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18888; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18889; GFX6-NEXT: s_endpgm 18890; 18891; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 18892; GFX7: ; %bb.0: ; %entry 18893; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18894; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18895; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18896; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18897; GFX7-NEXT: s_mov_b64 s[12:13], 16 18898; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18899; GFX7-NEXT: s_mov_b32 s6, s4 18900; GFX7-NEXT: s_mov_b32 s7, s5 18901; GFX7-NEXT: s_mov_b32 s11, s12 18902; GFX7-NEXT: s_mov_b32 s10, s13 18903; GFX7-NEXT: s_add_u32 s6, s6, s11 18904; GFX7-NEXT: s_addc_u32 s10, s7, s10 18905; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18906; GFX7-NEXT: s_mov_b32 s7, s10 18907; GFX7-NEXT: v_mov_b32_e32 v2, s9 18908; GFX7-NEXT: v_mov_b32_e32 v0, s8 18909; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18910; GFX7-NEXT: v_mov_b32_e32 v3, v0 18911; GFX7-NEXT: v_mov_b32_e32 v0, s6 18912; GFX7-NEXT: v_mov_b32_e32 v1, s7 18913; GFX7-NEXT: s_waitcnt vmcnt(0) 18914; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18915; GFX7-NEXT: s_waitcnt vmcnt(0) 18916; GFX7-NEXT: buffer_wbinvl1_vol 18917; GFX7-NEXT: v_mov_b32_e32 v0, s4 18918; GFX7-NEXT: v_mov_b32_e32 v1, s5 18919; GFX7-NEXT: flat_store_dword v[0:1], v2 18920; GFX7-NEXT: s_endpgm 18921; 18922; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 18923; GFX10-WGP: ; %bb.0: ; %entry 18924; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18925; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18926; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18927; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18928; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18929; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18930; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18931; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18932; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18933; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18934; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18935; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18936; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18937; GFX10-WGP-NEXT: buffer_gl1_inv 18938; GFX10-WGP-NEXT: buffer_gl0_inv 18939; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18940; GFX10-WGP-NEXT: s_endpgm 18941; 18942; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 18943; GFX10-CU: ; %bb.0: ; %entry 18944; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18945; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18946; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18947; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18948; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18949; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18950; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18951; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18952; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18953; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18954; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 18955; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18956; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18957; GFX10-CU-NEXT: buffer_gl1_inv 18958; GFX10-CU-NEXT: buffer_gl0_inv 18959; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18960; GFX10-CU-NEXT: s_endpgm 18961; 18962; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 18963; SKIP-CACHE-INV: ; %bb.0: ; %entry 18964; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18965; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18966; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18967; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18968; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18969; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18970; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18971; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18972; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18973; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18974; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18975; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18976; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18977; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18978; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18979; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18980; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18981; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18982; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18983; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18984; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18985; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18986; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18987; SKIP-CACHE-INV-NEXT: s_endpgm 18988; 18989; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 18990; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18991; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18992; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18993; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18994; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18995; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18996; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18997; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18998; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18999; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19000; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19001; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19002; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19003; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19004; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19005; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19006; 19007; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19008; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19009; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19010; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19011; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19012; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19013; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19014; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19015; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19016; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19017; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19018; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19019; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19020; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19021; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19022; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19023; GFX90A-TGSPLIT-NEXT: s_endpgm 19024; 19025; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19026; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19027; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19028; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19029; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19030; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19031; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19032; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19033; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19034; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19035; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19036; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 19037; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19038; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19039; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19040; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 19041; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19042; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19043; 19044; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19045; GFX940-TGSPLIT: ; %bb.0: ; %entry 19046; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19047; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19048; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19049; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19050; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19051; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19052; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19053; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19054; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19055; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 19056; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19057; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19058; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19059; GFX940-TGSPLIT-NEXT: buffer_inv sc1 19060; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19061; GFX940-TGSPLIT-NEXT: s_endpgm 19062; 19063; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19064; GFX11-WGP: ; %bb.0: ; %entry 19065; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19066; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19067; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19068; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19069; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19070; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19071; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19072; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19073; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19074; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19075; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19076; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19077; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19078; GFX11-WGP-NEXT: buffer_gl1_inv 19079; GFX11-WGP-NEXT: buffer_gl0_inv 19080; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19081; GFX11-WGP-NEXT: s_endpgm 19082; 19083; GFX11-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19084; GFX11-CU: ; %bb.0: ; %entry 19085; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19086; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19087; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19088; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19089; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19090; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19091; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19092; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19093; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19094; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19095; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19096; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19097; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19098; GFX11-CU-NEXT: buffer_gl1_inv 19099; GFX11-CU-NEXT: buffer_gl0_inv 19100; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19101; GFX11-CU-NEXT: s_endpgm 19102; 19103; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19104; GFX12-WGP: ; %bb.0: ; %entry 19105; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19106; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19107; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19108; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19109; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19110; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19111; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19112; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19113; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19114; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19115; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19116; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19117; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19118; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19119; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19120; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19121; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19122; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 19123; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19124; GFX12-WGP-NEXT: s_endpgm 19125; 19126; GFX12-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 19127; GFX12-CU: ; %bb.0: ; %entry 19128; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19129; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19130; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19131; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19132; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19133; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19134; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19135; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19136; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19137; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19138; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19139; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19140; GFX12-CU-NEXT: s_wait_storecnt 0x0 19141; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19142; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19143; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19144; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19145; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 19146; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19147; GFX12-CU-NEXT: s_endpgm 19148 ptr addrspace(1) %out, i32 %in, i32 %old) { 19149entry: 19150 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19151 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic 19152 %val0 = extractvalue { i32, i1 } %val, 0 19153 store i32 %val0, ptr addrspace(1) %out, align 4 19154 ret void 19155} 19156 19157define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( 19158; GFX6-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19159; GFX6: ; %bb.0: ; %entry 19160; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19161; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19162; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19163; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19164; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19165; GFX6-NEXT: s_mov_b32 s12, s5 19166; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19167; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19168; GFX6-NEXT: s_mov_b32 s11, -1 19169; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19170; GFX6-NEXT: s_mov_b32 s5, s12 19171; GFX6-NEXT: s_mov_b32 s6, s11 19172; GFX6-NEXT: s_mov_b32 s7, s10 19173; GFX6-NEXT: v_mov_b32_e32 v0, s9 19174; GFX6-NEXT: v_mov_b32_e32 v2, s8 19175; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19176; GFX6-NEXT: v_mov_b32_e32 v1, v2 19177; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 19178; GFX6-NEXT: s_waitcnt vmcnt(0) 19179; GFX6-NEXT: buffer_wbinvl1 19180; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19181; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 19182; GFX6-NEXT: s_endpgm 19183; 19184; GFX7-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19185; GFX7: ; %bb.0: ; %entry 19186; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19187; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19188; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19189; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19190; GFX7-NEXT: s_mov_b64 s[12:13], 16 19191; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19192; GFX7-NEXT: s_mov_b32 s6, s4 19193; GFX7-NEXT: s_mov_b32 s7, s5 19194; GFX7-NEXT: s_mov_b32 s11, s12 19195; GFX7-NEXT: s_mov_b32 s10, s13 19196; GFX7-NEXT: s_add_u32 s6, s6, s11 19197; GFX7-NEXT: s_addc_u32 s10, s7, s10 19198; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19199; GFX7-NEXT: s_mov_b32 s7, s10 19200; GFX7-NEXT: v_mov_b32_e32 v2, s9 19201; GFX7-NEXT: v_mov_b32_e32 v0, s8 19202; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19203; GFX7-NEXT: v_mov_b32_e32 v3, v0 19204; GFX7-NEXT: v_mov_b32_e32 v0, s6 19205; GFX7-NEXT: v_mov_b32_e32 v1, s7 19206; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19207; GFX7-NEXT: s_waitcnt vmcnt(0) 19208; GFX7-NEXT: buffer_wbinvl1_vol 19209; GFX7-NEXT: v_mov_b32_e32 v0, s4 19210; GFX7-NEXT: v_mov_b32_e32 v1, s5 19211; GFX7-NEXT: flat_store_dword v[0:1], v2 19212; GFX7-NEXT: s_endpgm 19213; 19214; GFX10-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19215; GFX10-WGP: ; %bb.0: ; %entry 19216; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19217; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19218; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19219; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19220; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19221; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19222; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19223; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19224; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19225; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19226; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19227; GFX10-WGP-NEXT: buffer_gl1_inv 19228; GFX10-WGP-NEXT: buffer_gl0_inv 19229; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19230; GFX10-WGP-NEXT: s_endpgm 19231; 19232; GFX10-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19233; GFX10-CU: ; %bb.0: ; %entry 19234; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19235; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19236; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19237; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19238; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19239; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19240; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19241; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19242; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19243; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19244; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19245; GFX10-CU-NEXT: buffer_gl1_inv 19246; GFX10-CU-NEXT: buffer_gl0_inv 19247; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19248; GFX10-CU-NEXT: s_endpgm 19249; 19250; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19251; SKIP-CACHE-INV: ; %bb.0: ; %entry 19252; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19253; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19254; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19255; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19256; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19257; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19258; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19259; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19260; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19261; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19262; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19263; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19264; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19265; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19266; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19267; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19268; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19269; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19270; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19271; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19272; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19273; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19274; SKIP-CACHE-INV-NEXT: s_endpgm 19275; 19276; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19277; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19278; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19279; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19280; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19281; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19282; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19283; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19284; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19285; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19286; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19287; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19288; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19289; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19290; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19291; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19292; 19293; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19294; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19295; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19296; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19297; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19298; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19299; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19300; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19301; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19302; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19303; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19304; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19305; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19306; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19307; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19308; GFX90A-TGSPLIT-NEXT: s_endpgm 19309; 19310; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19311; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19312; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19313; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19314; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19315; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19316; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19317; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19318; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19319; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19320; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19321; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19322; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19323; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 19324; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19325; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19326; 19327; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19328; GFX940-TGSPLIT: ; %bb.0: ; %entry 19329; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19330; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19331; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19332; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19333; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19334; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19335; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19336; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19337; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19338; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19339; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19340; GFX940-TGSPLIT-NEXT: buffer_inv sc1 19341; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19342; GFX940-TGSPLIT-NEXT: s_endpgm 19343; 19344; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19345; GFX11-WGP: ; %bb.0: ; %entry 19346; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19347; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19348; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19349; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19350; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19351; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19352; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19353; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19354; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19355; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19356; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19357; GFX11-WGP-NEXT: buffer_gl1_inv 19358; GFX11-WGP-NEXT: buffer_gl0_inv 19359; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19360; GFX11-WGP-NEXT: s_endpgm 19361; 19362; GFX11-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19363; GFX11-CU: ; %bb.0: ; %entry 19364; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19365; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19366; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19367; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19368; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19369; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19370; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19371; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19372; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19373; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19374; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19375; GFX11-CU-NEXT: buffer_gl1_inv 19376; GFX11-CU-NEXT: buffer_gl0_inv 19377; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19378; GFX11-CU-NEXT: s_endpgm 19379; 19380; GFX12-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19381; GFX12-WGP: ; %bb.0: ; %entry 19382; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19383; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19384; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19385; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19386; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19387; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19388; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19389; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19390; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19391; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19392; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19393; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19394; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19395; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 19396; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19397; GFX12-WGP-NEXT: s_endpgm 19398; 19399; GFX12-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: 19400; GFX12-CU: ; %bb.0: ; %entry 19401; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19402; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19403; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19404; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19405; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19406; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19407; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19408; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19409; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19410; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19411; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19412; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19413; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19414; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 19415; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19416; GFX12-CU-NEXT: s_endpgm 19417 ptr addrspace(1) %out, i32 %in, i32 %old) { 19418entry: 19419 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19420 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire 19421 %val0 = extractvalue { i32, i1 } %val, 0 19422 store i32 %val0, ptr addrspace(1) %out, align 4 19423 ret void 19424} 19425 19426define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( 19427; GFX6-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19428; GFX6: ; %bb.0: ; %entry 19429; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19430; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19431; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19432; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19433; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19434; GFX6-NEXT: s_mov_b32 s12, s5 19435; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19436; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19437; GFX6-NEXT: s_mov_b32 s11, -1 19438; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19439; GFX6-NEXT: s_mov_b32 s5, s12 19440; GFX6-NEXT: s_mov_b32 s6, s11 19441; GFX6-NEXT: s_mov_b32 s7, s10 19442; GFX6-NEXT: v_mov_b32_e32 v0, s9 19443; GFX6-NEXT: v_mov_b32_e32 v2, s8 19444; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19445; GFX6-NEXT: v_mov_b32_e32 v1, v2 19446; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 19447; GFX6-NEXT: s_waitcnt vmcnt(0) 19448; GFX6-NEXT: buffer_wbinvl1 19449; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19450; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 19451; GFX6-NEXT: s_endpgm 19452; 19453; GFX7-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19454; GFX7: ; %bb.0: ; %entry 19455; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19456; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19457; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19458; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19459; GFX7-NEXT: s_mov_b64 s[12:13], 16 19460; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19461; GFX7-NEXT: s_mov_b32 s6, s4 19462; GFX7-NEXT: s_mov_b32 s7, s5 19463; GFX7-NEXT: s_mov_b32 s11, s12 19464; GFX7-NEXT: s_mov_b32 s10, s13 19465; GFX7-NEXT: s_add_u32 s6, s6, s11 19466; GFX7-NEXT: s_addc_u32 s10, s7, s10 19467; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19468; GFX7-NEXT: s_mov_b32 s7, s10 19469; GFX7-NEXT: v_mov_b32_e32 v2, s9 19470; GFX7-NEXT: v_mov_b32_e32 v0, s8 19471; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19472; GFX7-NEXT: v_mov_b32_e32 v3, v0 19473; GFX7-NEXT: v_mov_b32_e32 v0, s6 19474; GFX7-NEXT: v_mov_b32_e32 v1, s7 19475; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19476; GFX7-NEXT: s_waitcnt vmcnt(0) 19477; GFX7-NEXT: buffer_wbinvl1_vol 19478; GFX7-NEXT: v_mov_b32_e32 v0, s4 19479; GFX7-NEXT: v_mov_b32_e32 v1, s5 19480; GFX7-NEXT: flat_store_dword v[0:1], v2 19481; GFX7-NEXT: s_endpgm 19482; 19483; GFX10-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19484; GFX10-WGP: ; %bb.0: ; %entry 19485; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19486; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19487; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19488; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19489; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19490; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19491; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19492; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19493; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19494; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19495; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19496; GFX10-WGP-NEXT: buffer_gl1_inv 19497; GFX10-WGP-NEXT: buffer_gl0_inv 19498; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19499; GFX10-WGP-NEXT: s_endpgm 19500; 19501; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19502; GFX10-CU: ; %bb.0: ; %entry 19503; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19504; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19505; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19506; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19507; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19508; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19509; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19510; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19511; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19512; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19513; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19514; GFX10-CU-NEXT: buffer_gl1_inv 19515; GFX10-CU-NEXT: buffer_gl0_inv 19516; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19517; GFX10-CU-NEXT: s_endpgm 19518; 19519; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19520; SKIP-CACHE-INV: ; %bb.0: ; %entry 19521; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19522; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19523; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19524; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19525; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19526; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19527; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19528; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19529; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19530; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19531; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19532; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19533; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19534; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19535; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19536; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19537; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19538; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19539; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19540; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19541; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19542; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19543; SKIP-CACHE-INV-NEXT: s_endpgm 19544; 19545; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19546; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19547; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19548; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19549; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19550; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19551; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19552; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19553; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19554; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19555; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19556; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19557; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19558; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19559; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19560; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19561; 19562; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19563; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19564; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19565; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19566; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19567; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19568; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19569; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19570; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19571; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19572; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19573; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19574; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19575; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19576; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19577; GFX90A-TGSPLIT-NEXT: s_endpgm 19578; 19579; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19580; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19581; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19582; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19583; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19584; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19585; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19586; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19587; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19588; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19589; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19590; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19591; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19592; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 19593; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19594; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19595; 19596; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19597; GFX940-TGSPLIT: ; %bb.0: ; %entry 19598; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19599; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19600; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19601; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19602; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19603; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19604; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19605; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19606; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19607; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19608; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19609; GFX940-TGSPLIT-NEXT: buffer_inv sc1 19610; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19611; GFX940-TGSPLIT-NEXT: s_endpgm 19612; 19613; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19614; GFX11-WGP: ; %bb.0: ; %entry 19615; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19616; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19617; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19618; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19619; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19620; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19621; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19622; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19623; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19624; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19625; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19626; GFX11-WGP-NEXT: buffer_gl1_inv 19627; GFX11-WGP-NEXT: buffer_gl0_inv 19628; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19629; GFX11-WGP-NEXT: s_endpgm 19630; 19631; GFX11-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19632; GFX11-CU: ; %bb.0: ; %entry 19633; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19634; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19635; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19636; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19637; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19638; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19639; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19640; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19641; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19642; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19643; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19644; GFX11-CU-NEXT: buffer_gl1_inv 19645; GFX11-CU-NEXT: buffer_gl0_inv 19646; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19647; GFX11-CU-NEXT: s_endpgm 19648; 19649; GFX12-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19650; GFX12-WGP: ; %bb.0: ; %entry 19651; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19652; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19653; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19654; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19655; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19656; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19657; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19658; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19659; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19660; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19661; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19662; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 19663; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19664; GFX12-WGP-NEXT: s_endpgm 19665; 19666; GFX12-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 19667; GFX12-CU: ; %bb.0: ; %entry 19668; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19669; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19670; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19671; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19672; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19673; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19674; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19675; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19676; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19677; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19678; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19679; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 19680; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19681; GFX12-CU-NEXT: s_endpgm 19682 ptr addrspace(1) %out, i32 %in, i32 %old) { 19683entry: 19684 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19685 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire 19686 %val0 = extractvalue { i32, i1 } %val, 0 19687 store i32 %val0, ptr addrspace(1) %out, align 4 19688 ret void 19689} 19690 19691define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( 19692; GFX6-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19693; GFX6: ; %bb.0: ; %entry 19694; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19695; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19696; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19697; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19698; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19699; GFX6-NEXT: s_mov_b32 s12, s5 19700; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19701; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19702; GFX6-NEXT: s_mov_b32 s11, -1 19703; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19704; GFX6-NEXT: s_mov_b32 s5, s12 19705; GFX6-NEXT: s_mov_b32 s6, s11 19706; GFX6-NEXT: s_mov_b32 s7, s10 19707; GFX6-NEXT: v_mov_b32_e32 v0, s9 19708; GFX6-NEXT: v_mov_b32_e32 v2, s8 19709; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19710; GFX6-NEXT: v_mov_b32_e32 v1, v2 19711; GFX6-NEXT: s_waitcnt vmcnt(0) 19712; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 19713; GFX6-NEXT: s_waitcnt vmcnt(0) 19714; GFX6-NEXT: buffer_wbinvl1 19715; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19716; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 19717; GFX6-NEXT: s_endpgm 19718; 19719; GFX7-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19720; GFX7: ; %bb.0: ; %entry 19721; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19722; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19723; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19724; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19725; GFX7-NEXT: s_mov_b64 s[12:13], 16 19726; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19727; GFX7-NEXT: s_mov_b32 s6, s4 19728; GFX7-NEXT: s_mov_b32 s7, s5 19729; GFX7-NEXT: s_mov_b32 s11, s12 19730; GFX7-NEXT: s_mov_b32 s10, s13 19731; GFX7-NEXT: s_add_u32 s6, s6, s11 19732; GFX7-NEXT: s_addc_u32 s10, s7, s10 19733; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19734; GFX7-NEXT: s_mov_b32 s7, s10 19735; GFX7-NEXT: v_mov_b32_e32 v2, s9 19736; GFX7-NEXT: v_mov_b32_e32 v0, s8 19737; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19738; GFX7-NEXT: v_mov_b32_e32 v3, v0 19739; GFX7-NEXT: v_mov_b32_e32 v0, s6 19740; GFX7-NEXT: v_mov_b32_e32 v1, s7 19741; GFX7-NEXT: s_waitcnt vmcnt(0) 19742; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19743; GFX7-NEXT: s_waitcnt vmcnt(0) 19744; GFX7-NEXT: buffer_wbinvl1_vol 19745; GFX7-NEXT: v_mov_b32_e32 v0, s4 19746; GFX7-NEXT: v_mov_b32_e32 v1, s5 19747; GFX7-NEXT: flat_store_dword v[0:1], v2 19748; GFX7-NEXT: s_endpgm 19749; 19750; GFX10-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19751; GFX10-WGP: ; %bb.0: ; %entry 19752; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19753; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19754; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19755; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19756; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19757; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19758; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19759; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19760; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19761; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19762; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19763; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19764; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19765; GFX10-WGP-NEXT: buffer_gl1_inv 19766; GFX10-WGP-NEXT: buffer_gl0_inv 19767; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19768; GFX10-WGP-NEXT: s_endpgm 19769; 19770; GFX10-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19771; GFX10-CU: ; %bb.0: ; %entry 19772; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19773; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19774; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19775; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19776; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19777; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19778; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19779; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19780; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19781; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19782; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19783; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19784; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19785; GFX10-CU-NEXT: buffer_gl1_inv 19786; GFX10-CU-NEXT: buffer_gl0_inv 19787; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19788; GFX10-CU-NEXT: s_endpgm 19789; 19790; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19791; SKIP-CACHE-INV: ; %bb.0: ; %entry 19792; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19793; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19794; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19795; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19796; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19797; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19798; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19799; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19800; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19801; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19802; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19803; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19804; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19805; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19806; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19807; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19808; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19809; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19810; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19811; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19812; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19813; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19814; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19815; SKIP-CACHE-INV-NEXT: s_endpgm 19816; 19817; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19818; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19819; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19820; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19821; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19822; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19823; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19824; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19825; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19826; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19827; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19828; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19829; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19830; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19831; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19832; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19833; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19834; 19835; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19836; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19837; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19838; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19839; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19840; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19841; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19842; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19843; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19844; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19845; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19846; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19847; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19848; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19849; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19850; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19851; GFX90A-TGSPLIT-NEXT: s_endpgm 19852; 19853; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19854; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19855; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19856; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19857; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19858; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19859; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19860; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19861; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19862; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19863; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19864; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 19865; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19866; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19867; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19868; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 19869; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19870; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19871; 19872; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19873; GFX940-TGSPLIT: ; %bb.0: ; %entry 19874; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19875; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19876; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19877; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19878; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19879; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19880; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19881; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19882; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19883; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 19884; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19885; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19886; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19887; GFX940-TGSPLIT-NEXT: buffer_inv sc1 19888; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19889; GFX940-TGSPLIT-NEXT: s_endpgm 19890; 19891; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19892; GFX11-WGP: ; %bb.0: ; %entry 19893; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19894; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19895; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19896; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19897; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19898; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19899; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19900; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19901; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19902; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19903; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19904; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19905; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19906; GFX11-WGP-NEXT: buffer_gl1_inv 19907; GFX11-WGP-NEXT: buffer_gl0_inv 19908; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19909; GFX11-WGP-NEXT: s_endpgm 19910; 19911; GFX11-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19912; GFX11-CU: ; %bb.0: ; %entry 19913; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19914; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19915; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19916; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19917; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19918; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19919; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19920; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19921; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19922; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19923; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19924; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19925; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19926; GFX11-CU-NEXT: buffer_gl1_inv 19927; GFX11-CU-NEXT: buffer_gl0_inv 19928; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19929; GFX11-CU-NEXT: s_endpgm 19930; 19931; GFX12-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19932; GFX12-WGP: ; %bb.0: ; %entry 19933; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19934; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19935; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19936; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19937; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19938; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19939; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19940; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19941; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19942; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19943; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19944; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19945; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19946; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19947; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19948; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19949; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19950; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 19951; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19952; GFX12-WGP-NEXT: s_endpgm 19953; 19954; GFX12-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 19955; GFX12-CU: ; %bb.0: ; %entry 19956; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19957; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19958; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19959; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19960; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19961; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19962; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19963; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19964; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19965; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19966; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19967; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19968; GFX12-CU-NEXT: s_wait_storecnt 0x0 19969; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 19970; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19971; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19972; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19973; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 19974; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19975; GFX12-CU-NEXT: s_endpgm 19976 ptr addrspace(1) %out, i32 %in, i32 %old) { 19977entry: 19978 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19979 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire 19980 %val0 = extractvalue { i32, i1 } %val, 0 19981 store i32 %val0, ptr addrspace(1) %out, align 4 19982 ret void 19983} 19984 19985define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( 19986; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 19987; GFX6: ; %bb.0: ; %entry 19988; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19989; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19990; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19991; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19992; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19993; GFX6-NEXT: s_mov_b32 s12, s5 19994; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19995; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19996; GFX6-NEXT: s_mov_b32 s11, -1 19997; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19998; GFX6-NEXT: s_mov_b32 s5, s12 19999; GFX6-NEXT: s_mov_b32 s6, s11 20000; GFX6-NEXT: s_mov_b32 s7, s10 20001; GFX6-NEXT: v_mov_b32_e32 v0, s9 20002; GFX6-NEXT: v_mov_b32_e32 v2, s8 20003; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20004; GFX6-NEXT: v_mov_b32_e32 v1, v2 20005; GFX6-NEXT: s_waitcnt vmcnt(0) 20006; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20007; GFX6-NEXT: s_waitcnt vmcnt(0) 20008; GFX6-NEXT: buffer_wbinvl1 20009; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20010; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20011; GFX6-NEXT: s_endpgm 20012; 20013; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20014; GFX7: ; %bb.0: ; %entry 20015; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20016; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20017; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20018; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20019; GFX7-NEXT: s_mov_b64 s[12:13], 16 20020; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20021; GFX7-NEXT: s_mov_b32 s6, s4 20022; GFX7-NEXT: s_mov_b32 s7, s5 20023; GFX7-NEXT: s_mov_b32 s11, s12 20024; GFX7-NEXT: s_mov_b32 s10, s13 20025; GFX7-NEXT: s_add_u32 s6, s6, s11 20026; GFX7-NEXT: s_addc_u32 s10, s7, s10 20027; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20028; GFX7-NEXT: s_mov_b32 s7, s10 20029; GFX7-NEXT: v_mov_b32_e32 v2, s9 20030; GFX7-NEXT: v_mov_b32_e32 v0, s8 20031; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20032; GFX7-NEXT: v_mov_b32_e32 v3, v0 20033; GFX7-NEXT: v_mov_b32_e32 v0, s6 20034; GFX7-NEXT: v_mov_b32_e32 v1, s7 20035; GFX7-NEXT: s_waitcnt vmcnt(0) 20036; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20037; GFX7-NEXT: s_waitcnt vmcnt(0) 20038; GFX7-NEXT: buffer_wbinvl1_vol 20039; GFX7-NEXT: v_mov_b32_e32 v0, s4 20040; GFX7-NEXT: v_mov_b32_e32 v1, s5 20041; GFX7-NEXT: flat_store_dword v[0:1], v2 20042; GFX7-NEXT: s_endpgm 20043; 20044; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20045; GFX10-WGP: ; %bb.0: ; %entry 20046; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20047; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20048; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20049; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20050; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20051; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20052; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20053; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20054; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20055; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20056; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20057; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20058; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20059; GFX10-WGP-NEXT: buffer_gl1_inv 20060; GFX10-WGP-NEXT: buffer_gl0_inv 20061; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20062; GFX10-WGP-NEXT: s_endpgm 20063; 20064; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20065; GFX10-CU: ; %bb.0: ; %entry 20066; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20067; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20068; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20069; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20070; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20071; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20072; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20073; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20074; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20075; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20076; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20077; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20078; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20079; GFX10-CU-NEXT: buffer_gl1_inv 20080; GFX10-CU-NEXT: buffer_gl0_inv 20081; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20082; GFX10-CU-NEXT: s_endpgm 20083; 20084; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20085; SKIP-CACHE-INV: ; %bb.0: ; %entry 20086; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20087; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20088; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20089; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20090; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20091; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20092; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20093; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20094; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20095; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20096; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20097; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20098; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20099; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20100; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20101; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20102; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20103; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20104; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20105; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20106; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20107; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20108; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20109; SKIP-CACHE-INV-NEXT: s_endpgm 20110; 20111; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20112; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20113; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20114; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20115; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20116; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20117; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20118; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20119; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20120; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20121; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20122; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20123; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20124; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20125; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20126; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20127; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20128; 20129; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20130; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20131; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20132; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20133; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20134; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20135; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20136; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20137; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20138; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20139; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20140; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20141; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20142; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20143; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20144; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20145; GFX90A-TGSPLIT-NEXT: s_endpgm 20146; 20147; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20148; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20149; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20150; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20151; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20152; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20153; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20154; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20155; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20156; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20157; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20158; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 20159; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20160; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 20161; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20162; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 20163; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20164; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20165; 20166; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20167; GFX940-TGSPLIT: ; %bb.0: ; %entry 20168; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20169; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20170; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20171; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20172; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20173; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20174; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20175; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20176; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20177; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 20178; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20179; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 20180; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20181; GFX940-TGSPLIT-NEXT: buffer_inv sc1 20182; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20183; GFX940-TGSPLIT-NEXT: s_endpgm 20184; 20185; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20186; GFX11-WGP: ; %bb.0: ; %entry 20187; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20188; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20189; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20190; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20191; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20192; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20193; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20194; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20195; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20196; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20197; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20198; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20199; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20200; GFX11-WGP-NEXT: buffer_gl1_inv 20201; GFX11-WGP-NEXT: buffer_gl0_inv 20202; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20203; GFX11-WGP-NEXT: s_endpgm 20204; 20205; GFX11-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20206; GFX11-CU: ; %bb.0: ; %entry 20207; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20208; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20209; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20210; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20211; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20212; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20213; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20214; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20215; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20216; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20217; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20218; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20219; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20220; GFX11-CU-NEXT: buffer_gl1_inv 20221; GFX11-CU-NEXT: buffer_gl0_inv 20222; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20223; GFX11-CU-NEXT: s_endpgm 20224; 20225; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20226; GFX12-WGP: ; %bb.0: ; %entry 20227; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20228; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20229; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20230; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20231; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20232; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20233; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20234; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20235; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20236; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20237; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20238; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20239; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20240; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 20241; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20242; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20243; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20244; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 20245; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20246; GFX12-WGP-NEXT: s_endpgm 20247; 20248; GFX12-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 20249; GFX12-CU: ; %bb.0: ; %entry 20250; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20251; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20252; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20253; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20254; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20255; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20256; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20257; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20258; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20259; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20260; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20261; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20262; GFX12-CU-NEXT: s_wait_storecnt 0x0 20263; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 20264; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20265; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20266; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20267; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 20268; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20269; GFX12-CU-NEXT: s_endpgm 20270 ptr addrspace(1) %out, i32 %in, i32 %old) { 20271entry: 20272 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20273 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire 20274 %val0 = extractvalue { i32, i1 } %val, 0 20275 store i32 %val0, ptr addrspace(1) %out, align 4 20276 ret void 20277} 20278 20279define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( 20280; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20281; GFX6: ; %bb.0: ; %entry 20282; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 20283; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20284; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 20285; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 20286; GFX6-NEXT: s_waitcnt lgkmcnt(0) 20287; GFX6-NEXT: s_mov_b32 s12, s5 20288; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 20289; GFX6-NEXT: s_mov_b32 s10, 0x100f000 20290; GFX6-NEXT: s_mov_b32 s11, -1 20291; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 20292; GFX6-NEXT: s_mov_b32 s5, s12 20293; GFX6-NEXT: s_mov_b32 s6, s11 20294; GFX6-NEXT: s_mov_b32 s7, s10 20295; GFX6-NEXT: v_mov_b32_e32 v0, s9 20296; GFX6-NEXT: v_mov_b32_e32 v2, s8 20297; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20298; GFX6-NEXT: v_mov_b32_e32 v1, v2 20299; GFX6-NEXT: s_waitcnt vmcnt(0) 20300; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20301; GFX6-NEXT: s_waitcnt vmcnt(0) 20302; GFX6-NEXT: buffer_wbinvl1 20303; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20304; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20305; GFX6-NEXT: s_endpgm 20306; 20307; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20308; GFX7: ; %bb.0: ; %entry 20309; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20310; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20311; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20312; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20313; GFX7-NEXT: s_mov_b64 s[12:13], 16 20314; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20315; GFX7-NEXT: s_mov_b32 s6, s4 20316; GFX7-NEXT: s_mov_b32 s7, s5 20317; GFX7-NEXT: s_mov_b32 s11, s12 20318; GFX7-NEXT: s_mov_b32 s10, s13 20319; GFX7-NEXT: s_add_u32 s6, s6, s11 20320; GFX7-NEXT: s_addc_u32 s10, s7, s10 20321; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20322; GFX7-NEXT: s_mov_b32 s7, s10 20323; GFX7-NEXT: v_mov_b32_e32 v2, s9 20324; GFX7-NEXT: v_mov_b32_e32 v0, s8 20325; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20326; GFX7-NEXT: v_mov_b32_e32 v3, v0 20327; GFX7-NEXT: v_mov_b32_e32 v0, s6 20328; GFX7-NEXT: v_mov_b32_e32 v1, s7 20329; GFX7-NEXT: s_waitcnt vmcnt(0) 20330; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20331; GFX7-NEXT: s_waitcnt vmcnt(0) 20332; GFX7-NEXT: buffer_wbinvl1_vol 20333; GFX7-NEXT: v_mov_b32_e32 v0, s4 20334; GFX7-NEXT: v_mov_b32_e32 v1, s5 20335; GFX7-NEXT: flat_store_dword v[0:1], v2 20336; GFX7-NEXT: s_endpgm 20337; 20338; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20339; GFX10-WGP: ; %bb.0: ; %entry 20340; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20341; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20342; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20343; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20344; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20345; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20346; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20347; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20348; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20349; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20350; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20351; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20352; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20353; GFX10-WGP-NEXT: buffer_gl1_inv 20354; GFX10-WGP-NEXT: buffer_gl0_inv 20355; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20356; GFX10-WGP-NEXT: s_endpgm 20357; 20358; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20359; GFX10-CU: ; %bb.0: ; %entry 20360; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20361; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20362; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20363; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20364; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20365; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20366; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20367; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20368; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20369; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20370; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20371; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20372; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20373; GFX10-CU-NEXT: buffer_gl1_inv 20374; GFX10-CU-NEXT: buffer_gl0_inv 20375; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20376; GFX10-CU-NEXT: s_endpgm 20377; 20378; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20379; SKIP-CACHE-INV: ; %bb.0: ; %entry 20380; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20381; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20382; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20383; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20384; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20385; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20386; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20387; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20388; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20389; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20390; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20391; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20392; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20393; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20394; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20395; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20396; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20397; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20398; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20399; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20400; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20401; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20402; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20403; SKIP-CACHE-INV-NEXT: s_endpgm 20404; 20405; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20406; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20407; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20408; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20409; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20410; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20411; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20412; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20413; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20414; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20415; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20416; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20417; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20418; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20419; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20420; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20421; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20422; 20423; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20424; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20425; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20426; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20427; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20428; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20429; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20430; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20431; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20432; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20433; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20434; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20435; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20436; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20437; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20438; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20439; GFX90A-TGSPLIT-NEXT: s_endpgm 20440; 20441; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20442; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20443; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20444; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20445; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20446; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20447; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20448; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20449; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20450; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20451; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20452; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 20453; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20454; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 20455; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20456; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 20457; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20458; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20459; 20460; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20461; GFX940-TGSPLIT: ; %bb.0: ; %entry 20462; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20463; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20464; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20465; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20466; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20467; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20468; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20469; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20470; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20471; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 20472; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20473; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 20474; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20475; GFX940-TGSPLIT-NEXT: buffer_inv sc1 20476; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20477; GFX940-TGSPLIT-NEXT: s_endpgm 20478; 20479; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20480; GFX11-WGP: ; %bb.0: ; %entry 20481; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20482; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20483; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20484; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20485; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20486; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20487; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20488; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20489; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20490; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20491; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20492; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20493; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20494; GFX11-WGP-NEXT: buffer_gl1_inv 20495; GFX11-WGP-NEXT: buffer_gl0_inv 20496; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20497; GFX11-WGP-NEXT: s_endpgm 20498; 20499; GFX11-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20500; GFX11-CU: ; %bb.0: ; %entry 20501; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20502; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20503; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20504; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20505; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20506; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20507; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20508; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20509; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20510; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20511; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20512; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20513; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20514; GFX11-CU-NEXT: buffer_gl1_inv 20515; GFX11-CU-NEXT: buffer_gl0_inv 20516; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20517; GFX11-CU-NEXT: s_endpgm 20518; 20519; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20520; GFX12-WGP: ; %bb.0: ; %entry 20521; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20522; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20523; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20524; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20525; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20526; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20527; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20528; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20529; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20530; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20531; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20532; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20533; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20534; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 20535; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20536; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20537; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20538; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 20539; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20540; GFX12-WGP-NEXT: s_endpgm 20541; 20542; GFX12-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 20543; GFX12-CU: ; %bb.0: ; %entry 20544; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20545; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20546; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20547; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20548; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20549; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20550; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20551; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20552; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20553; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20554; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20555; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20556; GFX12-CU-NEXT: s_wait_storecnt 0x0 20557; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 20558; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20559; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20560; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20561; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 20562; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20563; GFX12-CU-NEXT: s_endpgm 20564 ptr addrspace(1) %out, i32 %in, i32 %old) { 20565entry: 20566 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20567 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire 20568 %val0 = extractvalue { i32, i1 } %val, 0 20569 store i32 %val0, ptr addrspace(1) %out, align 4 20570 ret void 20571} 20572 20573define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( 20574; GFX6-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20575; GFX6: ; %bb.0: ; %entry 20576; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 20577; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20578; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 20579; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 20580; GFX6-NEXT: s_waitcnt lgkmcnt(0) 20581; GFX6-NEXT: s_mov_b32 s12, s5 20582; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 20583; GFX6-NEXT: s_mov_b32 s10, 0x100f000 20584; GFX6-NEXT: s_mov_b32 s11, -1 20585; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 20586; GFX6-NEXT: s_mov_b32 s5, s12 20587; GFX6-NEXT: s_mov_b32 s6, s11 20588; GFX6-NEXT: s_mov_b32 s7, s10 20589; GFX6-NEXT: v_mov_b32_e32 v0, s9 20590; GFX6-NEXT: v_mov_b32_e32 v2, s8 20591; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20592; GFX6-NEXT: v_mov_b32_e32 v1, v2 20593; GFX6-NEXT: s_waitcnt vmcnt(0) 20594; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20595; GFX6-NEXT: s_waitcnt vmcnt(0) 20596; GFX6-NEXT: buffer_wbinvl1 20597; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20598; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20599; GFX6-NEXT: s_endpgm 20600; 20601; GFX7-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20602; GFX7: ; %bb.0: ; %entry 20603; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20604; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20605; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20606; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20607; GFX7-NEXT: s_mov_b64 s[12:13], 16 20608; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20609; GFX7-NEXT: s_mov_b32 s6, s4 20610; GFX7-NEXT: s_mov_b32 s7, s5 20611; GFX7-NEXT: s_mov_b32 s11, s12 20612; GFX7-NEXT: s_mov_b32 s10, s13 20613; GFX7-NEXT: s_add_u32 s6, s6, s11 20614; GFX7-NEXT: s_addc_u32 s10, s7, s10 20615; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20616; GFX7-NEXT: s_mov_b32 s7, s10 20617; GFX7-NEXT: v_mov_b32_e32 v2, s9 20618; GFX7-NEXT: v_mov_b32_e32 v0, s8 20619; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20620; GFX7-NEXT: v_mov_b32_e32 v3, v0 20621; GFX7-NEXT: v_mov_b32_e32 v0, s6 20622; GFX7-NEXT: v_mov_b32_e32 v1, s7 20623; GFX7-NEXT: s_waitcnt vmcnt(0) 20624; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20625; GFX7-NEXT: s_waitcnt vmcnt(0) 20626; GFX7-NEXT: buffer_wbinvl1_vol 20627; GFX7-NEXT: v_mov_b32_e32 v0, s4 20628; GFX7-NEXT: v_mov_b32_e32 v1, s5 20629; GFX7-NEXT: flat_store_dword v[0:1], v2 20630; GFX7-NEXT: s_endpgm 20631; 20632; GFX10-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20633; GFX10-WGP: ; %bb.0: ; %entry 20634; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20635; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20636; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20637; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20638; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20639; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20640; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20641; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20642; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20643; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20644; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20645; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20646; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20647; GFX10-WGP-NEXT: buffer_gl1_inv 20648; GFX10-WGP-NEXT: buffer_gl0_inv 20649; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20650; GFX10-WGP-NEXT: s_endpgm 20651; 20652; GFX10-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20653; GFX10-CU: ; %bb.0: ; %entry 20654; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20655; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20656; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20657; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20658; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20659; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20660; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20661; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20662; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20663; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20664; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20665; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20666; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20667; GFX10-CU-NEXT: buffer_gl1_inv 20668; GFX10-CU-NEXT: buffer_gl0_inv 20669; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20670; GFX10-CU-NEXT: s_endpgm 20671; 20672; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20673; SKIP-CACHE-INV: ; %bb.0: ; %entry 20674; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20675; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20676; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20677; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20678; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20679; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20680; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20681; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20682; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20683; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20684; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20685; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20686; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20687; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20688; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20689; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20690; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20691; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20692; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20693; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20694; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20695; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20696; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20697; SKIP-CACHE-INV-NEXT: s_endpgm 20698; 20699; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20700; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20701; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20702; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20703; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20704; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20705; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20706; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20707; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20708; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20709; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20710; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20711; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20712; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20713; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20714; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20715; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20716; 20717; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20718; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20719; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20720; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20721; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20722; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20723; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20724; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20725; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20726; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20727; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20728; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20729; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20730; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20731; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20732; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20733; GFX90A-TGSPLIT-NEXT: s_endpgm 20734; 20735; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20736; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20737; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20738; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20739; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20740; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20741; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20742; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20743; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20744; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20745; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20746; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 20747; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20748; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 20749; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20750; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 20751; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20752; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20753; 20754; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20755; GFX940-TGSPLIT: ; %bb.0: ; %entry 20756; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20757; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20758; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20759; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20760; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20761; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20762; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20763; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20764; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20765; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 20766; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20767; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 20768; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20769; GFX940-TGSPLIT-NEXT: buffer_inv sc1 20770; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20771; GFX940-TGSPLIT-NEXT: s_endpgm 20772; 20773; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20774; GFX11-WGP: ; %bb.0: ; %entry 20775; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20776; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20777; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20778; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20779; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20780; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20781; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20782; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20783; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20784; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20785; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20786; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20787; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20788; GFX11-WGP-NEXT: buffer_gl1_inv 20789; GFX11-WGP-NEXT: buffer_gl0_inv 20790; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20791; GFX11-WGP-NEXT: s_endpgm 20792; 20793; GFX11-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20794; GFX11-CU: ; %bb.0: ; %entry 20795; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20796; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20797; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20798; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20799; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20800; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20801; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20802; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20803; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20804; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20805; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20806; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20807; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20808; GFX11-CU-NEXT: buffer_gl1_inv 20809; GFX11-CU-NEXT: buffer_gl0_inv 20810; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20811; GFX11-CU-NEXT: s_endpgm 20812; 20813; GFX12-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20814; GFX12-WGP: ; %bb.0: ; %entry 20815; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20816; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20817; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20818; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20819; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20820; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20821; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20822; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20823; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20824; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20825; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20826; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20827; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20828; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 20829; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20830; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20831; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20832; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 20833; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20834; GFX12-WGP-NEXT: s_endpgm 20835; 20836; GFX12-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: 20837; GFX12-CU: ; %bb.0: ; %entry 20838; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20839; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20840; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20841; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20842; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20843; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20844; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20845; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20846; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20847; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20848; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20849; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20850; GFX12-CU-NEXT: s_wait_storecnt 0x0 20851; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 20852; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20853; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20854; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20855; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 20856; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20857; GFX12-CU-NEXT: s_endpgm 20858 ptr addrspace(1) %out, i32 %in, i32 %old) { 20859entry: 20860 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20861 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst 20862 %val0 = extractvalue { i32, i1 } %val, 0 20863 store i32 %val0, ptr addrspace(1) %out, align 4 20864 ret void 20865} 20866 20867define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( 20868; GFX6-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 20869; GFX6: ; %bb.0: ; %entry 20870; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 20871; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20872; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 20873; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 20874; GFX6-NEXT: s_waitcnt lgkmcnt(0) 20875; GFX6-NEXT: s_mov_b32 s12, s5 20876; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 20877; GFX6-NEXT: s_mov_b32 s10, 0x100f000 20878; GFX6-NEXT: s_mov_b32 s11, -1 20879; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 20880; GFX6-NEXT: s_mov_b32 s5, s12 20881; GFX6-NEXT: s_mov_b32 s6, s11 20882; GFX6-NEXT: s_mov_b32 s7, s10 20883; GFX6-NEXT: v_mov_b32_e32 v0, s9 20884; GFX6-NEXT: v_mov_b32_e32 v2, s8 20885; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20886; GFX6-NEXT: v_mov_b32_e32 v1, v2 20887; GFX6-NEXT: s_waitcnt vmcnt(0) 20888; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20889; GFX6-NEXT: s_waitcnt vmcnt(0) 20890; GFX6-NEXT: buffer_wbinvl1 20891; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20892; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20893; GFX6-NEXT: s_endpgm 20894; 20895; GFX7-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 20896; GFX7: ; %bb.0: ; %entry 20897; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20898; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20899; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20900; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20901; GFX7-NEXT: s_mov_b64 s[12:13], 16 20902; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20903; GFX7-NEXT: s_mov_b32 s6, s4 20904; GFX7-NEXT: s_mov_b32 s7, s5 20905; GFX7-NEXT: s_mov_b32 s11, s12 20906; GFX7-NEXT: s_mov_b32 s10, s13 20907; GFX7-NEXT: s_add_u32 s6, s6, s11 20908; GFX7-NEXT: s_addc_u32 s10, s7, s10 20909; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20910; GFX7-NEXT: s_mov_b32 s7, s10 20911; GFX7-NEXT: v_mov_b32_e32 v2, s9 20912; GFX7-NEXT: v_mov_b32_e32 v0, s8 20913; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20914; GFX7-NEXT: v_mov_b32_e32 v3, v0 20915; GFX7-NEXT: v_mov_b32_e32 v0, s6 20916; GFX7-NEXT: v_mov_b32_e32 v1, s7 20917; GFX7-NEXT: s_waitcnt vmcnt(0) 20918; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20919; GFX7-NEXT: s_waitcnt vmcnt(0) 20920; GFX7-NEXT: buffer_wbinvl1_vol 20921; GFX7-NEXT: v_mov_b32_e32 v0, s4 20922; GFX7-NEXT: v_mov_b32_e32 v1, s5 20923; GFX7-NEXT: flat_store_dword v[0:1], v2 20924; GFX7-NEXT: s_endpgm 20925; 20926; GFX10-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 20927; GFX10-WGP: ; %bb.0: ; %entry 20928; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20929; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20930; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20931; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20932; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20933; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20934; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20935; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20936; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20937; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20938; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20939; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20940; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20941; GFX10-WGP-NEXT: buffer_gl1_inv 20942; GFX10-WGP-NEXT: buffer_gl0_inv 20943; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20944; GFX10-WGP-NEXT: s_endpgm 20945; 20946; GFX10-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 20947; GFX10-CU: ; %bb.0: ; %entry 20948; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20949; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20950; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20951; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20952; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20953; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20954; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20955; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20956; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20957; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20958; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20959; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20960; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20961; GFX10-CU-NEXT: buffer_gl1_inv 20962; GFX10-CU-NEXT: buffer_gl0_inv 20963; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20964; GFX10-CU-NEXT: s_endpgm 20965; 20966; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 20967; SKIP-CACHE-INV: ; %bb.0: ; %entry 20968; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20969; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20970; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20971; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20972; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20973; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20974; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20975; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20976; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20977; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20978; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20979; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20980; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20981; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20982; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20983; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20984; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20985; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20986; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20987; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20988; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20989; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20990; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20991; SKIP-CACHE-INV-NEXT: s_endpgm 20992; 20993; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 20994; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20995; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20996; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20997; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20998; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20999; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21000; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21001; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21002; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21003; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21004; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21005; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21006; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21007; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21008; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21009; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21010; 21011; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21012; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21013; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21014; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21015; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21016; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21017; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21018; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21019; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21020; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21021; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21022; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21023; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21024; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21025; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21026; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21027; GFX90A-TGSPLIT-NEXT: s_endpgm 21028; 21029; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21030; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21031; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21032; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21033; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21034; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21035; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21036; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21037; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21038; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21039; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21040; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 21041; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21042; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21043; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21044; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 21045; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21046; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21047; 21048; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21049; GFX940-TGSPLIT: ; %bb.0: ; %entry 21050; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21051; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21052; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21053; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21054; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21055; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21056; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21057; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21058; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21059; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 21060; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21061; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21062; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21063; GFX940-TGSPLIT-NEXT: buffer_inv sc1 21064; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21065; GFX940-TGSPLIT-NEXT: s_endpgm 21066; 21067; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21068; GFX11-WGP: ; %bb.0: ; %entry 21069; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 21070; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21071; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21072; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21073; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21074; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 21075; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 21076; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21077; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 21078; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21079; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21080; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21081; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21082; GFX11-WGP-NEXT: buffer_gl1_inv 21083; GFX11-WGP-NEXT: buffer_gl0_inv 21084; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21085; GFX11-WGP-NEXT: s_endpgm 21086; 21087; GFX11-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21088; GFX11-CU: ; %bb.0: ; %entry 21089; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 21090; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21091; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21092; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21093; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21094; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 21095; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 21096; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21097; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 21098; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21099; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21100; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21101; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21102; GFX11-CU-NEXT: buffer_gl1_inv 21103; GFX11-CU-NEXT: buffer_gl0_inv 21104; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21105; GFX11-CU-NEXT: s_endpgm 21106; 21107; GFX12-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21108; GFX12-WGP: ; %bb.0: ; %entry 21109; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 21110; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21111; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21112; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21113; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21114; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 21115; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 21116; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21117; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 21118; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21119; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21120; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21121; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21122; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 21123; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21124; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 21125; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21126; GFX12-WGP-NEXT: s_endpgm 21127; 21128; GFX12-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: 21129; GFX12-CU: ; %bb.0: ; %entry 21130; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 21131; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21132; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21133; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21134; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21135; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 21136; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 21137; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21138; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 21139; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21140; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21141; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21142; GFX12-CU-NEXT: s_wait_storecnt 0x0 21143; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 21144; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21145; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 21146; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21147; GFX12-CU-NEXT: s_endpgm 21148 ptr addrspace(1) %out, i32 %in, i32 %old) { 21149entry: 21150 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 21151 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst 21152 %val0 = extractvalue { i32, i1 } %val, 0 21153 store i32 %val0, ptr addrspace(1) %out, align 4 21154 ret void 21155} 21156 21157define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( 21158; GFX6-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21159; GFX6: ; %bb.0: ; %entry 21160; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 21161; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21162; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 21163; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 21164; GFX6-NEXT: s_waitcnt lgkmcnt(0) 21165; GFX6-NEXT: s_mov_b32 s12, s5 21166; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 21167; GFX6-NEXT: s_mov_b32 s10, 0x100f000 21168; GFX6-NEXT: s_mov_b32 s11, -1 21169; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 21170; GFX6-NEXT: s_mov_b32 s5, s12 21171; GFX6-NEXT: s_mov_b32 s6, s11 21172; GFX6-NEXT: s_mov_b32 s7, s10 21173; GFX6-NEXT: v_mov_b32_e32 v0, s9 21174; GFX6-NEXT: v_mov_b32_e32 v2, s8 21175; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21176; GFX6-NEXT: v_mov_b32_e32 v1, v2 21177; GFX6-NEXT: s_waitcnt vmcnt(0) 21178; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 21179; GFX6-NEXT: s_waitcnt vmcnt(0) 21180; GFX6-NEXT: buffer_wbinvl1 21181; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21182; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 21183; GFX6-NEXT: s_endpgm 21184; 21185; GFX7-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21186; GFX7: ; %bb.0: ; %entry 21187; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21188; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21189; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21190; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21191; GFX7-NEXT: s_mov_b64 s[12:13], 16 21192; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21193; GFX7-NEXT: s_mov_b32 s6, s4 21194; GFX7-NEXT: s_mov_b32 s7, s5 21195; GFX7-NEXT: s_mov_b32 s11, s12 21196; GFX7-NEXT: s_mov_b32 s10, s13 21197; GFX7-NEXT: s_add_u32 s6, s6, s11 21198; GFX7-NEXT: s_addc_u32 s10, s7, s10 21199; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21200; GFX7-NEXT: s_mov_b32 s7, s10 21201; GFX7-NEXT: v_mov_b32_e32 v2, s9 21202; GFX7-NEXT: v_mov_b32_e32 v0, s8 21203; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21204; GFX7-NEXT: v_mov_b32_e32 v3, v0 21205; GFX7-NEXT: v_mov_b32_e32 v0, s6 21206; GFX7-NEXT: v_mov_b32_e32 v1, s7 21207; GFX7-NEXT: s_waitcnt vmcnt(0) 21208; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21209; GFX7-NEXT: s_waitcnt vmcnt(0) 21210; GFX7-NEXT: buffer_wbinvl1_vol 21211; GFX7-NEXT: v_mov_b32_e32 v0, s4 21212; GFX7-NEXT: v_mov_b32_e32 v1, s5 21213; GFX7-NEXT: flat_store_dword v[0:1], v2 21214; GFX7-NEXT: s_endpgm 21215; 21216; GFX10-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21217; GFX10-WGP: ; %bb.0: ; %entry 21218; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 21219; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21220; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 21221; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 21222; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21223; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21224; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 21225; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21226; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 21227; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21228; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21229; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21230; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21231; GFX10-WGP-NEXT: buffer_gl1_inv 21232; GFX10-WGP-NEXT: buffer_gl0_inv 21233; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 21234; GFX10-WGP-NEXT: s_endpgm 21235; 21236; GFX10-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21237; GFX10-CU: ; %bb.0: ; %entry 21238; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 21239; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21240; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 21241; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 21242; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21243; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21244; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 21245; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21246; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 21247; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21248; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21249; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21250; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21251; GFX10-CU-NEXT: buffer_gl1_inv 21252; GFX10-CU-NEXT: buffer_gl0_inv 21253; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 21254; GFX10-CU-NEXT: s_endpgm 21255; 21256; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21257; SKIP-CACHE-INV: ; %bb.0: ; %entry 21258; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21259; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21260; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21261; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21262; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21263; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 21264; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 21265; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 21266; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 21267; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 21268; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 21269; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 21270; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21271; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 21272; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 21273; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21274; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 21275; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21276; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 21277; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21278; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21279; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21280; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 21281; SKIP-CACHE-INV-NEXT: s_endpgm 21282; 21283; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21284; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21285; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21286; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21287; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21288; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21289; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21290; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21291; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21292; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21293; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21294; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21295; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21296; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21297; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21298; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21299; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21300; 21301; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21302; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21303; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21304; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21305; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21306; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21307; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21308; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21309; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21310; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21311; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21312; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21313; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21314; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21315; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21316; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21317; GFX90A-TGSPLIT-NEXT: s_endpgm 21318; 21319; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21320; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21321; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21322; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21323; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21324; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21325; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21326; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21327; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21328; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21329; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21330; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 21331; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21332; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21333; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21334; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 21335; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21336; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21337; 21338; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21339; GFX940-TGSPLIT: ; %bb.0: ; %entry 21340; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21341; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21342; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21343; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21344; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21345; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21346; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21347; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21348; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21349; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 21350; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21351; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21352; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21353; GFX940-TGSPLIT-NEXT: buffer_inv sc1 21354; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21355; GFX940-TGSPLIT-NEXT: s_endpgm 21356; 21357; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21358; GFX11-WGP: ; %bb.0: ; %entry 21359; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 21360; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21361; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21362; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21363; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21364; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 21365; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 21366; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21367; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 21368; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21369; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21370; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21371; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21372; GFX11-WGP-NEXT: buffer_gl1_inv 21373; GFX11-WGP-NEXT: buffer_gl0_inv 21374; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21375; GFX11-WGP-NEXT: s_endpgm 21376; 21377; GFX11-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21378; GFX11-CU: ; %bb.0: ; %entry 21379; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 21380; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21381; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21382; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21383; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21384; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 21385; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 21386; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21387; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 21388; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21389; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21390; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21391; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21392; GFX11-CU-NEXT: buffer_gl1_inv 21393; GFX11-CU-NEXT: buffer_gl0_inv 21394; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21395; GFX11-CU-NEXT: s_endpgm 21396; 21397; GFX12-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21398; GFX12-WGP: ; %bb.0: ; %entry 21399; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 21400; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21401; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21402; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21403; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21404; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 21405; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 21406; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21407; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 21408; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21409; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21410; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21411; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21412; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 21413; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21414; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21415; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21416; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 21417; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21418; GFX12-WGP-NEXT: s_endpgm 21419; 21420; GFX12-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: 21421; GFX12-CU: ; %bb.0: ; %entry 21422; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 21423; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21424; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21425; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21426; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21427; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 21428; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 21429; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21430; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 21431; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21432; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21433; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21434; GFX12-CU-NEXT: s_wait_storecnt 0x0 21435; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 21436; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21437; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21438; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21439; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 21440; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21441; GFX12-CU-NEXT: s_endpgm 21442 ptr addrspace(1) %out, i32 %in, i32 %old) { 21443entry: 21444 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 21445 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst 21446 %val0 = extractvalue { i32, i1 } %val, 0 21447 store i32 %val0, ptr addrspace(1) %out, align 4 21448 ret void 21449} 21450 21451define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( 21452; GFX6-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21453; GFX6: ; %bb.0: ; %entry 21454; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 21455; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21456; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 21457; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 21458; GFX6-NEXT: s_waitcnt lgkmcnt(0) 21459; GFX6-NEXT: s_mov_b32 s12, s5 21460; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 21461; GFX6-NEXT: s_mov_b32 s10, 0x100f000 21462; GFX6-NEXT: s_mov_b32 s11, -1 21463; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 21464; GFX6-NEXT: s_mov_b32 s5, s12 21465; GFX6-NEXT: s_mov_b32 s6, s11 21466; GFX6-NEXT: s_mov_b32 s7, s10 21467; GFX6-NEXT: v_mov_b32_e32 v0, s9 21468; GFX6-NEXT: v_mov_b32_e32 v2, s8 21469; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21470; GFX6-NEXT: v_mov_b32_e32 v1, v2 21471; GFX6-NEXT: s_waitcnt vmcnt(0) 21472; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 21473; GFX6-NEXT: s_waitcnt vmcnt(0) 21474; GFX6-NEXT: buffer_wbinvl1 21475; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21476; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 21477; GFX6-NEXT: s_endpgm 21478; 21479; GFX7-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21480; GFX7: ; %bb.0: ; %entry 21481; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21482; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21483; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21484; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21485; GFX7-NEXT: s_mov_b64 s[12:13], 16 21486; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21487; GFX7-NEXT: s_mov_b32 s6, s4 21488; GFX7-NEXT: s_mov_b32 s7, s5 21489; GFX7-NEXT: s_mov_b32 s11, s12 21490; GFX7-NEXT: s_mov_b32 s10, s13 21491; GFX7-NEXT: s_add_u32 s6, s6, s11 21492; GFX7-NEXT: s_addc_u32 s10, s7, s10 21493; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21494; GFX7-NEXT: s_mov_b32 s7, s10 21495; GFX7-NEXT: v_mov_b32_e32 v2, s9 21496; GFX7-NEXT: v_mov_b32_e32 v0, s8 21497; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21498; GFX7-NEXT: v_mov_b32_e32 v3, v0 21499; GFX7-NEXT: v_mov_b32_e32 v0, s6 21500; GFX7-NEXT: v_mov_b32_e32 v1, s7 21501; GFX7-NEXT: s_waitcnt vmcnt(0) 21502; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21503; GFX7-NEXT: s_waitcnt vmcnt(0) 21504; GFX7-NEXT: buffer_wbinvl1_vol 21505; GFX7-NEXT: v_mov_b32_e32 v0, s4 21506; GFX7-NEXT: v_mov_b32_e32 v1, s5 21507; GFX7-NEXT: flat_store_dword v[0:1], v2 21508; GFX7-NEXT: s_endpgm 21509; 21510; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21511; GFX10-WGP: ; %bb.0: ; %entry 21512; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 21513; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21514; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 21515; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 21516; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21517; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21518; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 21519; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21520; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 21521; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21522; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21523; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21524; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21525; GFX10-WGP-NEXT: buffer_gl1_inv 21526; GFX10-WGP-NEXT: buffer_gl0_inv 21527; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 21528; GFX10-WGP-NEXT: s_endpgm 21529; 21530; GFX10-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21531; GFX10-CU: ; %bb.0: ; %entry 21532; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 21533; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21534; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 21535; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 21536; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21537; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21538; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 21539; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21540; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 21541; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21542; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21543; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21544; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21545; GFX10-CU-NEXT: buffer_gl1_inv 21546; GFX10-CU-NEXT: buffer_gl0_inv 21547; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 21548; GFX10-CU-NEXT: s_endpgm 21549; 21550; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21551; SKIP-CACHE-INV: ; %bb.0: ; %entry 21552; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21553; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21554; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21555; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21556; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21557; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 21558; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 21559; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 21560; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 21561; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 21562; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 21563; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 21564; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21565; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 21566; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 21567; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21568; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 21569; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21570; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 21571; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21572; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21573; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21574; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 21575; SKIP-CACHE-INV-NEXT: s_endpgm 21576; 21577; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21578; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21579; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21580; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21581; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21582; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21583; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21584; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21585; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21586; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21587; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21588; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21589; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21590; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21591; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21592; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21593; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21594; 21595; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21596; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21597; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21598; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21599; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21600; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21601; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21602; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21603; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21604; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21605; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21606; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21607; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21608; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21609; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21610; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21611; GFX90A-TGSPLIT-NEXT: s_endpgm 21612; 21613; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21614; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21615; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21616; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21617; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21618; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21619; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21620; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21621; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21622; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21623; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21624; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 21625; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21626; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21627; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21628; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 21629; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21630; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21631; 21632; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21633; GFX940-TGSPLIT: ; %bb.0: ; %entry 21634; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21635; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21636; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21637; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21638; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21639; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21640; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21641; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21642; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21643; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 21644; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21645; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21646; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21647; GFX940-TGSPLIT-NEXT: buffer_inv sc1 21648; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21649; GFX940-TGSPLIT-NEXT: s_endpgm 21650; 21651; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21652; GFX11-WGP: ; %bb.0: ; %entry 21653; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 21654; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21655; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21656; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21657; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21658; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 21659; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 21660; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21661; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 21662; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21663; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21664; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21665; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21666; GFX11-WGP-NEXT: buffer_gl1_inv 21667; GFX11-WGP-NEXT: buffer_gl0_inv 21668; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21669; GFX11-WGP-NEXT: s_endpgm 21670; 21671; GFX11-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21672; GFX11-CU: ; %bb.0: ; %entry 21673; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 21674; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21675; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21676; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21677; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21678; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 21679; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 21680; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21681; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 21682; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21683; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21684; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21685; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21686; GFX11-CU-NEXT: buffer_gl1_inv 21687; GFX11-CU-NEXT: buffer_gl0_inv 21688; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21689; GFX11-CU-NEXT: s_endpgm 21690; 21691; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21692; GFX12-WGP: ; %bb.0: ; %entry 21693; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 21694; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21695; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21696; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21697; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21698; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 21699; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 21700; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21701; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 21702; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21703; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21704; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21705; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21706; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 21707; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21708; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21709; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21710; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 21711; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21712; GFX12-WGP-NEXT: s_endpgm 21713; 21714; GFX12-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: 21715; GFX12-CU: ; %bb.0: ; %entry 21716; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 21717; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21718; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21719; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21720; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21721; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 21722; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 21723; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21724; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 21725; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21726; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21727; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21728; GFX12-CU-NEXT: s_wait_storecnt 0x0 21729; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 21730; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21731; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21732; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21733; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 21734; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21735; GFX12-CU-NEXT: s_endpgm 21736 ptr addrspace(1) %out, i32 %in, i32 %old) { 21737entry: 21738 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 21739 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst 21740 %val0 = extractvalue { i32, i1 } %val, 0 21741 store i32 %val0, ptr addrspace(1) %out, align 4 21742 ret void 21743} 21744 21745define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( 21746; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21747; GFX6: ; %bb.0: ; %entry 21748; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 21749; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21750; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 21751; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 21752; GFX6-NEXT: s_waitcnt lgkmcnt(0) 21753; GFX6-NEXT: s_mov_b32 s12, s5 21754; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 21755; GFX6-NEXT: s_mov_b32 s10, 0x100f000 21756; GFX6-NEXT: s_mov_b32 s11, -1 21757; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 21758; GFX6-NEXT: s_mov_b32 s5, s12 21759; GFX6-NEXT: s_mov_b32 s6, s11 21760; GFX6-NEXT: s_mov_b32 s7, s10 21761; GFX6-NEXT: v_mov_b32_e32 v0, s9 21762; GFX6-NEXT: v_mov_b32_e32 v2, s8 21763; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21764; GFX6-NEXT: v_mov_b32_e32 v1, v2 21765; GFX6-NEXT: s_waitcnt vmcnt(0) 21766; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 21767; GFX6-NEXT: s_waitcnt vmcnt(0) 21768; GFX6-NEXT: buffer_wbinvl1 21769; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21770; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 21771; GFX6-NEXT: s_endpgm 21772; 21773; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21774; GFX7: ; %bb.0: ; %entry 21775; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21776; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21777; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21778; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21779; GFX7-NEXT: s_mov_b64 s[12:13], 16 21780; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21781; GFX7-NEXT: s_mov_b32 s6, s4 21782; GFX7-NEXT: s_mov_b32 s7, s5 21783; GFX7-NEXT: s_mov_b32 s11, s12 21784; GFX7-NEXT: s_mov_b32 s10, s13 21785; GFX7-NEXT: s_add_u32 s6, s6, s11 21786; GFX7-NEXT: s_addc_u32 s10, s7, s10 21787; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21788; GFX7-NEXT: s_mov_b32 s7, s10 21789; GFX7-NEXT: v_mov_b32_e32 v2, s9 21790; GFX7-NEXT: v_mov_b32_e32 v0, s8 21791; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21792; GFX7-NEXT: v_mov_b32_e32 v3, v0 21793; GFX7-NEXT: v_mov_b32_e32 v0, s6 21794; GFX7-NEXT: v_mov_b32_e32 v1, s7 21795; GFX7-NEXT: s_waitcnt vmcnt(0) 21796; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21797; GFX7-NEXT: s_waitcnt vmcnt(0) 21798; GFX7-NEXT: buffer_wbinvl1_vol 21799; GFX7-NEXT: v_mov_b32_e32 v0, s4 21800; GFX7-NEXT: v_mov_b32_e32 v1, s5 21801; GFX7-NEXT: flat_store_dword v[0:1], v2 21802; GFX7-NEXT: s_endpgm 21803; 21804; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21805; GFX10-WGP: ; %bb.0: ; %entry 21806; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 21807; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21808; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 21809; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 21810; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21811; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21812; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 21813; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21814; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 21815; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21816; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21817; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21818; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21819; GFX10-WGP-NEXT: buffer_gl1_inv 21820; GFX10-WGP-NEXT: buffer_gl0_inv 21821; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 21822; GFX10-WGP-NEXT: s_endpgm 21823; 21824; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21825; GFX10-CU: ; %bb.0: ; %entry 21826; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 21827; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21828; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 21829; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 21830; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21831; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21832; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 21833; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21834; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 21835; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21836; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21837; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21838; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21839; GFX10-CU-NEXT: buffer_gl1_inv 21840; GFX10-CU-NEXT: buffer_gl0_inv 21841; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 21842; GFX10-CU-NEXT: s_endpgm 21843; 21844; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21845; SKIP-CACHE-INV: ; %bb.0: ; %entry 21846; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21847; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21848; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21849; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21850; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21851; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 21852; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 21853; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 21854; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 21855; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 21856; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 21857; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 21858; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21859; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 21860; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 21861; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21862; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 21863; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21864; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 21865; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21866; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21867; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21868; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 21869; SKIP-CACHE-INV-NEXT: s_endpgm 21870; 21871; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21872; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21873; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21874; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21875; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21876; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21877; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21878; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21879; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21880; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21881; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21882; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21883; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21884; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21885; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21886; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21887; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21888; 21889; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21890; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21891; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21892; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21893; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21894; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21895; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21896; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21897; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21898; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21899; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21900; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21901; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21902; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21903; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21904; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21905; GFX90A-TGSPLIT-NEXT: s_endpgm 21906; 21907; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21908; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21909; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21910; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21911; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21912; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21913; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21914; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21915; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21916; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21917; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21918; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 21919; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21920; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21921; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21922; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 21923; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21924; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21925; 21926; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21927; GFX940-TGSPLIT: ; %bb.0: ; %entry 21928; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21929; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21930; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21931; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21932; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21933; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21934; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21935; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21936; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21937; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 21938; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21939; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 21940; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21941; GFX940-TGSPLIT-NEXT: buffer_inv sc1 21942; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21943; GFX940-TGSPLIT-NEXT: s_endpgm 21944; 21945; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21946; GFX11-WGP: ; %bb.0: ; %entry 21947; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 21948; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21949; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21950; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21951; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21952; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 21953; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 21954; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21955; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 21956; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21957; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21958; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21959; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21960; GFX11-WGP-NEXT: buffer_gl1_inv 21961; GFX11-WGP-NEXT: buffer_gl0_inv 21962; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21963; GFX11-WGP-NEXT: s_endpgm 21964; 21965; GFX11-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21966; GFX11-CU: ; %bb.0: ; %entry 21967; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 21968; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21969; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21970; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21971; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21972; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 21973; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 21974; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21975; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 21976; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21977; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21978; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21979; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21980; GFX11-CU-NEXT: buffer_gl1_inv 21981; GFX11-CU-NEXT: buffer_gl0_inv 21982; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21983; GFX11-CU-NEXT: s_endpgm 21984; 21985; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 21986; GFX12-WGP: ; %bb.0: ; %entry 21987; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 21988; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21989; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21990; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21991; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21992; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 21993; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 21994; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21995; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 21996; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21997; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21998; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21999; GFX12-WGP-NEXT: s_wait_storecnt 0x0 22000; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 22001; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 22002; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 22003; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22004; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV 22005; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 22006; GFX12-WGP-NEXT: s_endpgm 22007; 22008; GFX12-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 22009; GFX12-CU: ; %bb.0: ; %entry 22010; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 22011; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22012; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22013; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22014; GFX12-CU-NEXT: s_wait_kmcnt 0x0 22015; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 22016; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 22017; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 22018; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 22019; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22020; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22021; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22022; GFX12-CU-NEXT: s_wait_storecnt 0x0 22023; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 22024; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22025; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22026; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22027; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV 22028; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 22029; GFX12-CU-NEXT: s_endpgm 22030 ptr addrspace(1) %out, i32 %in, i32 %old) { 22031entry: 22032 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 22033 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst 22034 %val0 = extractvalue { i32, i1 } %val, 0 22035 store i32 %val0, ptr addrspace(1) %out, align 4 22036 ret void 22037} 22038